In [1]:
45

45

In [2]:
import os
from dotenv import load_dotenv

import pandas as pd
from langchain.chat_models import init_chat_model
from langchain.tools import tool
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_huggingface import HuggingFaceEmbeddings

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [3]:
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [4]:
import os
from langchain.chat_models import init_chat_model

model = init_chat_model("groq:llama-3.3-70b-versatile")
response = model.invoke("Why am I learning at DOT?")
response

AIMessage(content="You're learning at DOT (Digital Opportunity Trust) because it's a wonderful organization that offers various programs and resources to help people like you develop new skills and knowledge. Here are some possible reasons why you might be learning at DOT:\n\n1. **Digital skills development**: DOT provides training and resources to help you build skills in areas like digital literacy, coding, and entrepreneurship. This can help you stay competitive in the job market, improve your career prospects, or start your own business.\n2. **Personal growth and empowerment**: DOT's programs are designed to empower you with the skills, confidence, and knowledge to achieve your goals and make a positive impact in your community.\n3. **Community engagement**: DOT often works with local communities to provide access to technology, education, and economic opportunities. By learning at DOT, you're part of a community that values knowledge sharing, collaboration, and social responsibili

Data Loading Class

In [5]:
class AnimeDataLoader:
    def __init__(self, original_csv: str, processed_csv: str):
        self.original_csv = original_csv
        self.processed_csv = processed_csv

    def load_and_process(self):
        df = pd.read_csv(
            self.original_csv,
            encoding="utf-8",
            on_bad_lines="skip"
        ).dropna()

        required_cols = {"Name", "Genres", "sypnopsis"}
        if not required_cols.issubset(df.columns):
            raise ValueError("Missing required columns in CSV")

        df["combined_info"] = (
            "Title: " + df["Name"]
            + " Overview: " + df["sypnopsis"]
            + " Genres: " + df["Genres"]
        )

        df[["combined_info"]].to_csv(
            self.processed_csv,
            index=False,
            encoding="utf-8"
        )

        return self.processed_csv


In [43]:
og_csv = "../data/anime_with_synopsis.csv"
process_csv = "../data/anime_updated.csv"

data_loader = AnimeDataLoader(og_csv, process_csv)

data_loader.load_and_process()

'../data/anime_updated.csv'

Data Splitting

In [7]:
class VectorStoreBuilder:
    def __init__(self, csv_path: str, persist_dir: str = "chroma_db"):
        self.csv_path = csv_path
        self.persist_dir = persist_dir
        self.embedding = HuggingFaceEmbeddings(
            model_name="all-MiniLM-L6-v2"
        )

    def build_and_save_vectorstore(self):
        loader = CSVLoader(
            file_path=self.csv_path,
            encoding="utf-8",
            metadata_columns=[]
        )

        documents = loader.load()

        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=20
        )

        chunks = splitter.split_documents(documents)

        db = Chroma.from_documents(
            chunks,
            self.embedding,
            persist_directory=self.persist_dir
        )
        db.persist()

    def load_vector_store(self):
        return Chroma(
            persist_directory=self.persist_dir,
            embedding_function=self.embedding
        )
        




In [8]:
print("WE ARE BUILDING VECTOR STORE")

og_csv = "../data/anime_with_synopsis.csv"
process_csv = "../data/anime_updated.csv"
persist_dir = "chroma_db"

data_loader = AnimeDataLoader(og_csv, process_csv)

processed_csv_path = data_loader.load_and_process()

WE ARE BUILDING VECTOR STORE


In [9]:
print("STILL BUILDING VECTOR STORE")

vector_builder = VectorStoreBuilder(
    csv_path=processed_csv_path,
    persist_dir= persist_dir
)

vector_builder.build_and_save_vectorstore()

print("Vector Database Ready!")

STILL BUILDING VECTOR STORE
Vector Database Ready!


  db.persist()


RETRIVER TOOL

In [31]:
vector_builder = VectorStoreBuilder(
    csv_path="",
    persist_dir="chroma_db"

)

retriver = vector_builder.load_vector_store().as_retriever()

In [32]:

query = 'Tell me about Naruto?'
retriver.invoke(query)

[Document(metadata={'source': '../data/anime_updated.csv', 'row': 10}, page_content="combined_info: Title: Naruto Overview: oments prior to Naruto Uzumaki's birth, a huge demon known as the Kyuubi, the Nine-Tailed Fox, attacked Konohagakure, the Hidden Leaf Village, and wreaked havoc. In order to put an end to the Kyuubi's rampage, the leader of the village, the Fourth Hokage, sacrificed his life and sealed the monstrous beast inside the newborn Naruto. Now, Naruto is a hyperactive and knuckle-headed ninja still living in Konohagakure. Shunned because of the Kyuubi inside him, Naruto struggles to find his place in the village, while his burning desire to become the Hokage of Konohagakure leads him not only to some great new friends, but also some deadly foes. Genres: Action, Adventure, Comedy, Super Power, Martial Arts, Shounen"),
 Document(metadata={'source': '../data/anime_updated.csv', 'row': 43}, page_content="combined_info: Title: D.C.: Da Capo Overview: Every year the flowers blo

In [33]:
@tool
def anime_retriever_tool(query: str) -> str:
    """
    Use this tool to search the anime knowledge base.

    Always call this tool for anime-related questions such as:
    recommendations, similarity search, genres, or plot summaries.

    Input:
    - query: User's anime preference or question.

    Output:
    - Relevant anime information retrieved from the vector database.
    """
    docs = retriver.invoke(query)
    return "\n\n".join(doc.page_content for doc in docs)

In [34]:
model_with_tools = model.bind_tools([anime_retriever_tool])

In [35]:
messages = [
    {
        "role":"user",
        "content": "Can you sugguest something similar to Naruto and also tell me joke."
    }
]

In [36]:
ai_msg = model_with_tools.invoke(messages)
messages.append(ai_msg)


In [37]:
messages

[{'role': 'user',
  'content': 'Can you sugguest something similar to Naruto and also tell me joke.'},
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '8s6jeq1gp', 'function': {'arguments': '{"query":"similar to Naruto"}', 'name': 'anime_retriever_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 342, 'total_tokens': 383, 'completion_time': 0.10747042, 'completion_tokens_details': None, 'prompt_time': 0.017400533, 'prompt_tokens_details': None, 'queue_time': 0.057456836, 'total_time': 0.124870953}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_dae98b5ecb', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c137d-489d-7121-83ea-88b32e49e8dc-0', tool_calls=[{'name': 'anime_retriever_tool', 'args': {'query': 'similar to Naruto'}, 'id': '8s6jeq1gp', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens': 34

In [38]:
ai_msg.tool_calls

[{'name': 'anime_retriever_tool',
  'args': {'query': 'similar to Naruto'},
  'id': '8s6jeq1gp',
  'type': 'tool_call'}]

In [39]:
messages[0]

{'role': 'user',
 'content': 'Can you sugguest something similar to Naruto and also tell me joke.'}

In [40]:
for tool_call in ai_msg.tool_calls:
    tool_result = anime_retriever_tool.invoke(tool_call)
    messages.append(tool_result)

In [42]:
# Step 3: Final response
final_response = model_with_tools.invoke(messages)
print(final_response.text)


