Install

In [1]:
!pip -q uninstall -y timm torchvision fastai bitsandbytes
!pip -q install -U torch==2.10.0 torchaudio==2.10.0
!pip -q install -U "smolagents[transformers]" transformers accelerate
!pip -q install -U beautifulsoup4 requests lxml
!pip -q install -U diffusers safetensors pydub soundfile

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m915.7/915.7 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.2/12.2 MB[0m [31m94.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.1/139.1 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.3/188.3 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cuda-python 12.9.5 requires cuda-bindings~=12.9.5, but you have cuda-bindings 12.9.4 which is incompatible.[0m[31m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m566.4/566.4 kB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m111.6 MB/s[0

Imports + Helpers

In [2]:
import os, re, json
import requests
from bs4 import BeautifulSoup

OUT_DIR = "/content/fbi_story_output"
os.makedirs(OUT_DIR, exist_ok=True)

HEADERS = {"User-Agent": "Mozilla/5.0 (educational-colab-project; +https://colab.research.google.com)"}

def clean_text(t: str) -> str:
    return re.sub(r"\s+", " ", t).strip()

def safe_filename(name: str) -> str:
    name = re.sub(r"[^a-zA-Z0-9_\- ]+", "", name).strip().replace(" ", "_")
    return name[:80] if name else "case"


Tools

In [3]:
from smolagents import Tool

class ListFBICasesTool(Tool):
    name = "list_fbi_cases"
    description = "Fetch FBI famous cases list page and return JSON array of {title, url}."
    inputs = {}
    output_type = "string"

    def forward(self) -> str:
        url = "https://www.fbi.gov/history/famous-cases"
        r = requests.get(url, headers=HEADERS, timeout=30)
        r.raise_for_status()
        soup = BeautifulSoup(r.text, "lxml")

        items = []
        for h3 in soup.select("h3"):
            a = h3.find("a", href=True)
            if not a:
                continue
            title = clean_text(a.get_text(" ", strip=True))
            href = a["href"].strip()
            if href.startswith("/"):
                href = "https://www.fbi.gov" + href
            if "fbi.gov/history/famous-cases/" in href and title:
                items.append({"title": title, "url": href})

        seen = set()
        dedup = []
        for it in items:
            if it["url"] not in seen:
                seen.add(it["url"])
                dedup.append(it)

        return json.dumps(dedup, indent=2)


class FetchCasePageTool(Tool):
    name = "fetch_case_page_text"
    description = "Fetch a specific FBI case page and return main text (plain text)."
    inputs = {"url": {"type": "string", "description": "Full FBI case URL (fbi.gov)."}}
    output_type = "string"

    def forward(self, url: str) -> str:
        if "fbi.gov" not in url:
            raise ValueError("URL must be on fbi.gov")

        r = requests.get(url, headers=HEADERS, timeout=30)
        r.raise_for_status()
        soup = BeautifulSoup(r.text, "lxml")

        main = soup.select_one("div.field--name-body") or soup.select_one("article") or soup
        for tag in main.select("script, style, nav, footer, header"):
            tag.decompose()

        text = clean_text(main.get_text(" ", strip=True))
        return text[:9000]


tools = [ListFBICasesTool(), FetchCasePageTool()]
print("Tools ready:", [t.name for t in tools])


✅ Tools ready: ['list_fbi_cases', 'fetch_case_page_text']


Model and Agent

In [4]:
import torch
try:
    from smolagents import TransformersModel
except ImportError:
    from smolagents.models import TransformersModel

from smolagents import ToolCallingAgent

LLM_NAME = "Qwen/Qwen2.5-3B-Instruct"

model = TransformersModel(
    model_id=LLM_NAME,
    device_map="auto",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    load_in_4bit=False,
)

agent = ToolCallingAgent(
    tools=tools,
    model=model,
    max_steps=10,
    add_base_tools=False,
    use_structured_outputs_internally=True,
)

print("Agent ready:", LLM_NAME)


config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

TypeError: MultiStepAgent.__init__() got an unexpected keyword argument 'use_structured_outputs_internally'

Run Agent, Choose Case, Write Story, Split Into Sections

In [None]:
GOAL = """
Use the tools to:
1) Call list_fbi_cases.
2) Choose ONE major case that will make a strong story.
3) Call fetch_case_page_text on that chosen case URL.

Then, using ONLY the fetched case text (no new facts), write a compelling non-fiction narrative:
- 900–1400 words
- readable (high-school level)
- faithful to the facts (no invented people/events)

Then split the story into exactly 6 sections. Each section must include:
- section_title
- section_text (120–220 words)
- image_prompt (cinematic, descriptive, no text in image)

FINAL ANSWER:
Return ONLY valid JSON (no markdown, no code blocks) exactly:

{
  "case_title": "...",
  "case_url": "...",
  "story_text": "...",
  "sections": [
    {"section_title": "...", "section_text": "...", "image_prompt": "..."}
  ]
}
"""

result = agent.run(GOAL)
print(str(result)[:1200])


Parse JSON

In [None]:
text = str(result)
s = text.find("{")
e = text.rfind("}")
if s == -1 or e == -1:
    raise ValueError("Agent did not return JSON. Try increasing max_steps or using Qwen2.5-7B if you have VRAM.")

data = json.loads(text[s:e+1])
print("Chosen case:", data["case_title"])
print("Case URL:", data["case_url"])
print("Sections:", len(data["sections"]))


Image Generation

In [None]:
import torch
from diffusers import AutoPipelineForText2Image

IMG_MODEL = "stabilityai/sd-turbo"
device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = AutoPipelineForText2Image.from_pretrained(
    IMG_MODEL,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    variant="fp16" if device == "cuda" else None
).to(device)

def generate_image(prompt: str, out_path: str):
    img = pipe(prompt=prompt, num_inference_steps=4, guidance_scale=0.0).images[0]
    img.save(out_path)
    return out_path


Narration

In [None]:
import soundfile as sf
from transformers import pipeline
from pydub import AudioSegment

tts = pipeline(
    "text-to-speech",
    model="facebook/mms-tts-eng",
    device=0 if torch.cuda.is_available() else -1
)

def tts_to_wav(text: str, wav_path: str):
    out = tts(text)
    sf.write(wav_path, out["audio"], out["sampling_rate"])
    return wav_path

def concat_wavs(wav_paths, out_path):
    combined = AudioSegment.silent(duration=200)
    for wp in wav_paths:
        combined += AudioSegment.from_wav(wp) + AudioSegment.silent(duration=200)
    combined.export(out_path, format="wav")
    return out_path


Save Outputs

In [None]:

case_title = data.get("case_title", "FBI_Case")
case_url = data.get("case_url", "")
case_slug = safe_filename(case_title)

case_dir = os.path.join(OUT_DIR, case_slug)
os.makedirs(case_dir, exist_ok=True)

# Save JSON
json_path = os.path.join(case_dir, "story_data.json")
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

# Generate images + section narration
wav_paths = []
for i, sec in enumerate(data["sections"], start=1):
    img_path = os.path.join(case_dir, f"section_{i:02d}.png")
    wav_path = os.path.join(case_dir, f"section_{i:02d}.wav")
    generate_image(sec["image_prompt"], img_path)
    tts_to_wav(sec["section_text"], wav_path)
    wav_paths.append(wav_path)

# Full narration
full_audio = os.path.join(case_dir, "full_narration.wav")
concat_wavs(wav_paths, full_audio)

# HTML storybook
html_path = os.path.join(case_dir, "storybook.html")
parts = []
parts.append(f"<h1>{case_title}</h1>")
if case_url:
    parts.append(f'<p><a href="{case_url}" target="_blank">Source: FBI case page</a></p>')

for i, sec in enumerate(data["sections"], start=1):
    parts.append(f"<hr><h2>{i}. {sec['section_title']}</h2>")
    parts.append(f'<img src="section_{i:02d}.png" style="max-width:900px;width:100%;border-radius:12px;">')
    parts.append(f"<p style='font-size:18px;line-height:1.5'>{sec['section_text']}</p>")
    parts.append(f"<p><i>Prompt:</i> {sec['image_prompt']}</p>")

parts.append("<hr><h3>Full narration audio</h3>")
parts.append("<audio controls src='full_narration.wav'></audio>")

with open(html_path, "w", encoding="utf-8") as f:
    f.write("\n".join(parts))

print("Saved to:", case_dir)
print("JSON:", json_path)
print("HTML:", html_path)
print("Audio:", full_audio)
