Building a recommendation system that scrapes online comic book aggregators and suggests other relevant titles. 

Plan:
1. PlannerAgent: decomposes the user request (“recommend me titles similar to X”) into steps.
2. CrawlerAgent: uses WebFetchTool + ParseTool (+ a SiteAdapter) to get raw lists (title, genres, rating, summary).
3. ClassifierAgent: normalizes genres, cleans ratings, embeds descriptions
4. RecommenderAgent: Given a title, find top-k similar titles

In [12]:
from pydantic import BaseModel
from typing import List, Optional

ANILIST_API = "https://graphql.anilist.co"

class Book(BaseModel):
    id: int
    title: str
    url: str
    genres: List[str]
    avg_score: Optional[float] = None
    popularity: Optional[int] = None
    desc: Optional[str] = None

In [19]:
from smolagents import tool
import httpx

ANILIST_QUERY = """
query ($page: Int = 1, $perPage: Int = 10, $search: String) {
  Page(page: $page, perPage: $perPage) {
    media(
      type: MANGA
      countryOfOrigin: KR
      search: $search
      sort: POPULARITY_DESC
    ) {
      id
      title { romaji english native }
      genres
      averageScore
      popularity
      siteUrl
      description(asHtml: false)
      format
    }
  }
}
"""

@tool
def search_manhwa(search: str, page: int = 1, per_page: int = 20) -> list[dict]:
    """
    Query AniList for Korean manhwa via GraphQL and return structured results.

    Args:
        search (str): Free-text title filter (can be empty for broad discovery).
        page (int): Page number for pagination.
        per_page (int): Number of results per page.

    Returns:
        list[dict]: Plain-dict representation of manhwa objects
    """

    gql_vars = {
        "page": page,
        "perPage": per_page,
        "search": None if not search.strip() else search
    }

    payload = {"query": ANILIST_QUERY, "variables": gql_vars}

    with httpx.Client(timeout=20) as client:
        r = client.post(ANILIST_API, json=payload)
        r.raise_for_status()
        media = r.json()["data"]["Page"]["media"]

    results = []
    for m in media:
        results.append(
            dict(
                id=m["id"],
                title=m["title"]["english"] or m["title"]["romaji"] or m["title"]["native"],
                url=m["siteUrl"],
                genres=m.get("genres", []),
                average_score=m.get("averageScore"),
                popularity=m.get("popularity"),
                description=m.get("description"),
            )
        )
    return results

In [20]:
BOOK_DB: list[Book] = []

@tool
def upsert_manhwa(items: list[dict]) -> int :
    """
    Insert (naively) a list of Manhwa into memory.

    Args:
        items (list[Manhwa]): Parsed manhwa objects.

    Returns:
        int: Number of manhwas inserted.
    """
    for rec in items:
        BOOK_DB.append(
            Book(**rec)  # convert back to Pydantic for internal use
        )


    return len(items)

@tool
def get_by_genre(genre: str, min_score: int = 70, top_k: int = 10, allow_no_score: bool = True) -> list[Book]:
    """
    Returns up to top_k manhwa matching genre and above a minimum AniList score. 

    Args:
        genre (str): Genre to match (case-insensitive substring).
        min_score (int): Minimum averageScore on AniList (0-100).
        top_k (int): Max number of results.
        allow_no_score (bool): allow unscored manhwa in search.

    Returns:
        list[Manhwa]: Filtered, score-descending list.
    """
    g = genre.lower()
    matches = [m for m in BOOK_DB if any(g in gg.lower() for gg in m.genres) and (m.average_score or 0) >= min_score]

    if allow_no_score:
        no_score = [
            m for m in BOOK_DB
            if any(g in gg.lower() for gg in m.genres)
            and m.average_score is None
        ]
        matches.extend(no_score)


    matches.sort(key=lambda x: (x.avg_score or 0, x.popularity or 0), reverse=True)
    return matches[:top_k]

In [21]:
import os
from smolagents import LiteLLMModel, ToolCallingAgent

model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=os.getenv("GEMINI_API_KEY"))

fetch_agent = ToolCallingAgent(
    name="fetch_agent",
    description="pulls manhwa from AniList and stores them.",
    tools=[search_manhwa, upsert_manhwa],
    model=model
)

reco_agent = ToolCallingAgent(
    name="recommender_agent",
    description="given a genre (and optional min score), recommends top manhwa",
    tools=[get_by_genre],
    model=model
)

In [23]:
def planner(user_msg: str) -> str:
    user_msg_low = user_msg.lower()
    if "crawl" in user_msg_low or "index" in user_msg_low or "search" in user_msg_low:
        return fetch_agent.run(
            "Use anilist_search_manhwa(search='null') for a broad discovery, page=1, per_page=50. "
            "Then call upsert_manhwa with the results and return how many were inserted."
        )

    return reco_agent.run(
        f"Call get_by_genre with genre inferred from: '{user_msg}'. "
        "If a minimum score is not specified, default to 75. Then present the top 5 cleanly."
    )

In [24]:
print(planner("crawl and index action manhwa"))
print(planner("Recommend me high-score action manhwa"))

9


No action manhwa were found with a score of 75 or higher.
