In [None]:
%pip install langchain langchain-community langchain_groq chromadb gradio

In [None]:
import pandas as pd
from langchain.chains import RetrievalQA
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_groq import ChatGroq

In [None]:
anime  = pd.read_csv("/anime_processed.csv")
anime.head()

In [None]:
anime['combined_df'] =anime.apply(lambda row: f"Title: {row['Name']}. English Name : {row['English name']}. Japanese Name : {row['Japanese name']}. Overview : {row['Synopsis']}. Episodes : {row['Episodes']}. Score : {row['Score']}. Type : {row['Type']}. Rating : {row['Rating']}", axis = 1)

In [None]:
anime['combined_df'][0]

In [None]:
anime[['combined_df']].to_csv('anime_updated.csv', index = False)

In [None]:
pd.read_csv("anime_updated.csv")

**Data** Loader and VectorStore using **Langchain**

In [None]:
loader = CSVLoader(file_path="/content/anime_updated.csv")
data = loader.load()

text_splitter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
texts = text_splitter.split_documents(data)
embeddings = HuggingFaceEmbeddings()
llm = ChatGroq(api_key = "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", model = "gemma2-9b-it", temperature= 0 )

docssearch = Chroma.from_documents(texts, embeddings)

**Querying Vector DB Store for movie recommendation**

In [None]:
query = "I am looking for an animated sci-fi movie. What could you suggest to me"
docs = docssearch.similarity_search(query, k =1)

In [None]:
docs

**Using QA Retrieval for movie recommendation**

In [None]:
qa = RetrievalQA.from_chain_type(llm,
            chain_type = "stuff",
            retriever = docssearch.as_retriever(),
            return_source_documents = True)

In [None]:
query = "Can you recommend a romance anime with a unique storyline?"
result = qa({"query": query})

In [None]:
result['result']

In [None]:
result['source_documents'][0]

**Prompt Enginnering**

In [None]:
from langchain.prompts import PromptTemplate

template = """Anda adalah sistem pemberi rekomendasi film yang membantu pengguna menemukan anime yang sesuai dengan preferensi mereka.
Gunakan potongan konteks berikut untuk menjawab pertanyaan di akhir. Untuk setiap pertanyaan, sarankan tiga anime,
dengan deskripsi singkat tentang plot dan alasan mengapa pengguna mungkin menyukainya.
Jika Anda tidak tahu jawabannya, katakan saja Anda tidak tahu, jangan mencoba mengarang jawaban.

{context}

Question: {question}
Your response:"""

PROMPT  =  PromptTemplate(
    template=template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}

qa = RetrievalQA.from_chain_type(llm,
            chain_type = "stuff",
            retriever = docssearch.as_retriever(),
            return_source_documents = True,
                                 chain_type_kwargs = chain_type_kwargs)

query = "Give me a list of comedy animes that are lighthearted and fun."
result = qa({"query": query})
print(result['result'])


**Second Template**

In [None]:
from langchain.prompts import PromptTemplate

template_prefix = """Anda adalah sistem pemberi rekomendasi film yang membantu pengguna menemukan anime yang sesuai dengan preferensi mereka.
Gunakan potongan konteks berikut untuk menjawab pertanyaan di akhir. Untuk setiap pertanyaan, sarankan tiga anime,
dengan deskripsi singkat tentang plot dan alasan mengapa pengguna mungkin menyukainya.
Jika Anda tidak tahu jawabannya, katakan saja Anda tidak tahu, jangan mencoba mengarang jawaban.

{context}"""

user_info = """This is what we know about the user, and you can use this information to better tune your research:
Age: {age}
Gender: {gender}"""

template_suffix= """Question: {question}
Your response:"""

user_info = user_info.format(age = 18, gender = 'male')

COMBINED_PROMPT = template_prefix +'\n'+ user_info +'\n'+ template_suffix
print(COMBINED_PROMPT)

In [None]:
PROMPT  =  PromptTemplate(
    template=COMBINED_PROMPT, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}

qa = RetrievalQA.from_chain_type(llm,
            chain_type = "stuff",
            retriever = docssearch.as_retriever(),
            return_source_documents = True,
                                 chain_type_kwargs = chain_type_kwargs)

query = "Which anime would you suggest for someone new to anime?"
result = qa({"query": query})
print(result['result'])

In [None]:
import gradio as gr
import pandas as pd
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.prompts import PromptTemplate

# Load anime dataset
anime = pd.read_csv("/anime_processed.csv")
anime = anime.dropna()
anime['combined_info'] =anime.apply(lambda row: f"Title: {row['Name']}. English Name : {row['English name']}. Japanese Name : {row['Japanese name']}. Overview : {row['Synopsis']}. Episodes : {row['Episodes']}. Score : {row['Score']}. Type : {row['Type']}. Rating : {row['Rating']}", axis = 1)

# Save updated dataset
anime[['combined_info']].to_csv('anime_updated.csv', index=False)
loader = CSVLoader(file_path="/content/anime_updated.csv")
data = loader.load()

# Text splitting
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

# Create embeddings
embeddings = HuggingFaceEmbeddings()
docssearch = Chroma.from_documents(texts, embeddings)
retriever = docssearch.as_retriever()

# Define LLM
llm = ChatGroq(api_key="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", model="gemma2-9b-it", temperature=0)

# Custom Prompt Template
template = """Anda adalah sistem pemberi rekomendasi film yang membantu pengguna menemukan anime yang sesuai dengan preferensi mereka.
Gunakan potongan konteks berikut untuk menjawab pertanyaan di akhir. Untuk setiap pertanyaan, sarankan anime dengan jumlah yang diinginkan pengguna,
dengan deskripsi singkat tentang plot dan alasan mengapa pengguna mungkin menyukainya.
Jika Anda tidak tahu jawabannya, katakan saja Anda tidak tahu, jangan mencoba mengarang jawaban.

{context}

Question: {question}
Your response:"""

PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt": PROMPT}

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

def recommend_anime(query):
    result = qa_chain({'query': query})
    return result['result']

# Gradio interface
theme = "Respair/Shiki@1.2.1"

iface = gr.Interface(
    fn=recommend_anime,
    inputs=gr.Textbox(label="Enter your anime preference"),
    outputs=gr.Textbox(label="Recommended Anime"),
    title="Anime Movie Recommender",
    theme=theme,

)

if __name__ == "__main__":
    iface.launch()
