In [6]:
import os
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from langchain.chat_models import init_chat_model
from langchain.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_huggingface import HuggingFaceEmbeddings 

  from .autonotebook import tqdm as notebook_tqdm


Load the .env file contents

In [7]:
load_dotenv()

True

In [8]:
GROQ_API_KEY=os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"]=GROQ_API_KEY

Test whether model is working

In [9]:
model=init_chat_model(model='groq:qwen/qwen3-32b')
response=model.invoke("Tell me about the city of Pune")
response

AIMessage(content="<think>\nOkay, the user wants to know about Pune. Let me start by recalling what I know. Pune is a city in Maharashtra, India. It's also known as Poona, right? I should mention that. It's the second-largest city in Maharashtra after Mumbai. \n\nFirst, the history. Pune has a rich history. It was a part of the Maratha Empire, so I should talk about that. Shivaji Maharaj founded it in the 17th century. The Peshwas, the prime ministers of the Marathas, later made it their capital. That's an important point. The city has a lot of historical sites like the Shaniwar Wada and the forts like Sinhagad. Maybe the user is interested in cultural aspects too. The Marathi culture is prominent there, and the city is known for its educational institutions. IIT Bombay, IISER, and several universities are there. \n\nEconomically, Pune is a major hub. It has a mix of traditional industries and IT. IT parks like Hinjewadi and ITI are significant. Also, the automotive industry is big wit

Data Ingestion Piupeline


Data Loading

In [10]:
class AnimeDataLoader:
    def __init__(self,original_csv:str,processed_csv:str):
        self.original_csv=original_csv
        self.processed_csv=processed_csv

    def load_and_process(self):
        df=pd.read_csv(self.original_csv,
                       encoding='utf-8',
                       on_bad_lines='skip').dropna()
        
        required_cols={'Name','Genres','sypnopsis'}
        if not required_cols.issubset(df.columns):
            raise ValueError("Missing required columns in CSV")
        
        df['combined_info']=(
            "Title: "+df['Name']+ "Overview: " + df['sypnopsis'] + "Genres: " + df['Genres']
        )

        df[['combined_info']].to_csv(self.processed_csv,index=False,encoding='utf-8')

        return self.processed_csv

RAG Pipeline

Split and STore data


In [11]:
class VectorStoreBuilder:
    def __init__(self,csv_path:str,persist_dir:str="chroma_db"):
        self.csv_path=csv_path
        self.persist_dir=persist_dir
        self.embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")



    def build_and_save_vectorstore(self):
        loader=CSVLoader(file_path=self.csv_path,encoding='utf-8',metadata_columns=[])
        
        # Parsing Document
        documents=loader.load()
        
        #Splitting/Chunking
        splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=0)

        chunks=splitter.split_documents(documents)

        # Database Creation and saving vectors. The embedding is done within the function using th Huggingface embeddings.
        db=Chroma.from_documents(chunks,self.embedding,persist_directory=self.persist_dir)
        db.persist()

    def load_vector_store(self):
        return Chroma(persist_directory=self.persist_dir,embedding_function=self.embedding)

In [12]:
print('Started processing')

original_csv='data/anime_with_synopsis.csv'
processed_csv='data/anime_updated.csv'
persist_dir="chroma_db"

loader=AnimeDataLoader(original_csv,processed_csv)
processed_csv_path=loader.load_and_process()

vector_builder=VectorStoreBuilder(processed_csv_path,persist_dir=persist_dir)
vector_builder.build_and_save_vectorstore()

print('Vector Database is Ready!')

Started processing


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Error while downloading from https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/model.safetensors: HTTPSConnectionPool(host='cas-bridge.xethub.hf.co', port=443): Read timed out.
Trying to resume download...


Vector Database is Ready!


  db.persist()


Retirever

In [13]:
vector_builder=VectorStoreBuilder(csv_path="",persist_dir="chroma_db")

retriever=vector_builder.load_vector_store().as_retriever()

  return Chroma(persist_directory=self.persist_dir,embedding_function=self.embedding)


Convert Retriever into a tool

In [14]:
@tool
def anime_retriever_tool(query:str)->str:
    """ Use this tool to search the database

    Always call tool for anime related searches like recommendations, similarity search, genres, or plot summaries"AnimeDataLoader

    Input: query
    outpot: Information retrieved from database
    """
    docs=retriever.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs)

Initializing model

In [16]:
model=init_chat_model(model='groq:qwen/qwen3-32b')
model_with_tools=model.bind_tools([anime_retriever_tool])

testing the model

In [17]:
messages=[{'role':'user','content':'Can you suggest me some anime similar to Demon Slayer?'}]

# Step 1 : Model decides tool usage
ai_msg=model_with_tools.invoke(messages)
messages.append(ai_msg)

# Step 2 : Execute tools
for tool_call in ai_msg.tool_calls:
    tool_result=anime_retriever_tool.invoke(tool_call)
    messages.append(tool_result)

# Step 3 : Final Response
final_response=model_with_tools.invoke(messages)
print(final_response.text)

Here are some anime similar to *Demon Slayer* based on the retrieved information:

1. **Saiyuuki Reload**  
   - **Genres**: Action, Adventure, Comedy, Demons, Drama, Josei, Supernatural  
   - **Overview**: Follows Priest Genjo Sanzo and his companions on a perilous journey to stop the demon Gyoumao. Features intense battles, group dynamics, and supernatural themes.  

2. **Bouken Ou Beet (Adventure King Beet)**  
   - **Genres**: Adventure, Fantasy, Shounen, Supernatural  
   - **Overview**: A young hero embarks on a quest to end a dark era ruled by demons. Combines epic battles, a determined protagonist, and a team-driven narrative.  

The other results ("new realm," "of fate") appear incomplete or potentially inaccurate. If you'd like more recommendations or specific genres/themes, let me know!


The model is complete and returns results based on the movies in the database