# Configuration

In [69]:
# %%capture
# !pip install cohere tiktoken openai
# !pip install langchain
# !pip install python-dotenv
# !pip install pandas
# !pip install langchain
# !pip install chromadb
# !pip install langchain-openai



In [6]:
import openai
import os

from dotenv import load_dotenv, find_dotenv

In [7]:
_ = load_dotenv(find_dotenv())
openai.api_key = os.environ.get("OPENAI_API_KEY")

# Data import

In [10]:
import pandas as pd

In [12]:
file_name="data/books.csv"

In [13]:
df = pd.read_csv(file_name)
df = df[:100]
df.shape

(100, 25)

In [14]:
df.columns

Index(['bookId', 'title', 'series', 'author', 'rating', 'description',
       'language', 'isbn', 'genres', 'characters', 'bookFormat', 'edition',
       'pages', 'publisher', 'publishDate', 'firstPublishDate', 'awards',
       'numRatings', 'ratingsByStars', 'likedPercent', 'setting', 'coverImg',
       'bbeScore', 'bbeVotes', 'price'],
      dtype='object')

In [15]:
#get only title and description
df = df[["title", "description", "author","genres","characters","publishDate"]]

In [16]:
# df.to_csv("tabular.csv", index=False)

# Lang-chain

In [18]:
from langchain_community.document_loaders import DataFrameLoader
 
# Load data from a Pandas DataFrame using PandasDataFrameLoader
loader = DataFrameLoader(df, page_content_column="description")


In [19]:
docs = loader.load()
print(docs[0].page_content)

WINNING MEANS FAME AND FORTUNE.LOSING MEANS CERTAIN DEATH.THE HUNGER GAMES HAVE BEGUN. . . .In the ruins of a place once known as North America lies the nation of Panem, a shining Capitol surrounded by twelve outlying districts. The Capitol is harsh and cruel and keeps the districts in line by forcing them all to send one boy and once girl between the ages of twelve and eighteen to participate in the annual Hunger Games, a fight to the death on live TV.Sixteen-year-old Katniss Everdeen regards it as a death sentence when she steps forward to take her sister's place in the Games. But Katniss has been close to dead before—and survival, for her, is second nature. Without really meaning to, she becomes a contender. But if she is to win, she will have to start making choices that weight survival against humanity and life against love.


In [20]:
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter

In [21]:
chunk_size = 128
chunk_overlap = 64

In [22]:
c_text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
r_text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

In [23]:
pagesC= c_text_splitter.split_documents(docs)
pagesR= r_text_splitter.split_documents(docs)

In [24]:
print(docs[0])

print(pagesC[0])
print(pagesC[1])
print()
print(pagesR[0])
print(pagesR[1])


page_content="WINNING MEANS FAME AND FORTUNE.LOSING MEANS CERTAIN DEATH.THE HUNGER GAMES HAVE BEGUN. . . .In the ruins of a place once known as North America lies the nation of Panem, a shining Capitol surrounded by twelve outlying districts. The Capitol is harsh and cruel and keeps the districts in line by forcing them all to send one boy and once girl between the ages of twelve and eighteen to participate in the annual Hunger Games, a fight to the death on live TV.Sixteen-year-old Katniss Everdeen regards it as a death sentence when she steps forward to take her sister's place in the Games. But Katniss has been close to dead before—and survival, for her, is second nature. Without really meaning to, she becomes a contender. But if she is to win, she will have to start making choices that weight survival against humanity and life against love." metadata={'title': 'The Hunger Games', 'author': 'Suzanne Collins', 'genres': "['Young Adult', 'Fiction', 'Dystopia', 'Fantasy', 'Science Ficti

In [25]:
print(len(docs))
print(len(pagesC))
print(len(pagesR))

100
103
1234


In [26]:
pages=pagesR

# ChromaDB and OpenAIAPI
to manage the embeddings

In [43]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

In [54]:
embeddings = OpenAIEmbeddings()
#name of the directory to save the embeddings
persist_directory = 'persis_chroma2'
vectordb = Chroma.from_documents(documents=pages, embedding=embeddings, persist_directory=persist_directory)

In [55]:
vectordb._collection.count()

1234

In [56]:
vectordb.persist()

# Similarity search and MMR search
Similarity search to find the most silir response for axample all apples
, and MMR search to find diverse apples

<img src="img.jpg" width="300">

In [62]:
question = "which book talks about a wizard?"

In [63]:
response = vectordb.similarity_search(question, k=3)
response

[Document(page_content="receives a letter that tells him the truth about himself: he's a wizard. A mysterious visitor rescues him from his relatives", metadata={'author': 'J.K. Rowling, Mary GrandPré (Illustrator)', 'characters': "['Draco Malfoy', 'Ron Weasley', 'Petunia Dursley', 'Vernon Dursley', 'Dudley Dursley', 'Severus Snape', 'Quirinus Quirrell', 'Rubeus Hagrid', 'Lord Voldemort', 'Minerva McGonagall', 'Neville Longbottom', 'Fred Weasley', 'George Weasley', 'Percy Weasley', 'Filius Flitwick', 'Pomona Sprout', 'Molly Weasley', 'Poppy Pomfrey', 'Oliver Wood', 'Parvati Patil', 'Dean Thomas', 'James Potter', 'Lily Potter', 'Seamus Finnigan', 'Garrick Ollivander', 'Rolanda Hooch', 'Katie Bell', 'Albus Dumbledore', 'Dedalus Diggle', 'Harry Potter', 'Hermione Granger', 'Lavender Brown']", 'genres': "['Fantasy', 'Fiction', 'Young Adult', 'Magic', 'Childrens', 'Middle Grade', 'Adventure', 'Classics', 'Audiobook', 'Science Fiction Fantasy']", 'publishDate': '11/01/03', 'title': "Harry Pot

In [64]:
response = vectordb.max_marginal_relevance_search(question, k=3)
response

[Document(page_content="receives a letter that tells him the truth about himself: he's a wizard. A mysterious visitor rescues him from his relatives", metadata={'author': 'J.K. Rowling, Mary GrandPré (Illustrator)', 'characters': "['Draco Malfoy', 'Ron Weasley', 'Petunia Dursley', 'Vernon Dursley', 'Dudley Dursley', 'Severus Snape', 'Quirinus Quirrell', 'Rubeus Hagrid', 'Lord Voldemort', 'Minerva McGonagall', 'Neville Longbottom', 'Fred Weasley', 'George Weasley', 'Percy Weasley', 'Filius Flitwick', 'Pomona Sprout', 'Molly Weasley', 'Poppy Pomfrey', 'Oliver Wood', 'Parvati Patil', 'Dean Thomas', 'James Potter', 'Lily Potter', 'Seamus Finnigan', 'Garrick Ollivander', 'Rolanda Hooch', 'Katie Bell', 'Albus Dumbledore', 'Dedalus Diggle', 'Harry Potter', 'Hermione Granger', 'Lavender Brown']", 'genres': "['Fantasy', 'Fiction', 'Young Adult', 'Magic', 'Childrens', 'Middle Grade', 'Adventure', 'Classics', 'Audiobook', 'Science Fiction Fantasy']", 'publishDate': '11/01/03', 'title': "Harry Pot

# RAG

In [71]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
import langchain
langchain.verbose = True

In [95]:
llm_name = "gpt-3.5-turbo"

llm = ChatOpenAI(model_name=llm_name, temperature=1)

In [96]:
qa_chain=RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(search_kwargs={"k":3}),
    chain_type="stuff",
    return_source_documents=True
    )

In [97]:
question="which book talks about a vampire?"

In [98]:
result = qa_chain({"query":question})



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
theoretical interpretations of Dracula by Phyllis A. Roth, Carol A. Senf, Franco Moretti, Christopher Craft, Bram Dijkstra,

Deeply seductive and extraordinarily suspenseful, Twilight is a love story with bite.

journey through mortal and immortal life. Louis recounts how he became a vampire at the hands of the radiant and sinister
Human: which book talks about a vampire?[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [100]:
result

{'query': 'which book talks about a vampire?',
 'result': 'The book that talks about a vampire is "Interview with the Vampire" by Anne Rice. It is part of "The Vampire Chronicles" series.',
 'source_documents': [Document(page_content='theoretical interpretations of Dracula by Phyllis A. Roth, Carol A. Senf, Franco Moretti, Christopher Craft, Bram Dijkstra,', metadata={'author': 'Bram Stoker, Nina Auerbach (Editor), David J. Skal (Editor)', 'characters': "['Jonathan Harker', 'Lucy Westenra', 'Abraham Van Helsing', 'John Seward', 'Quincey Morris', 'Arthur Holmwood (later Lord Godalming)', 'R.M. Renfield', 'Mina Harker', 'Quincey Harker', 'Peter Hawkins', 'Samuel F. Billington', 'Herr Leutner', 'Mr. Swales', 'Mr. Westenra', 'Mrs. Westenra', 'Lord Godalming (elder)', 'Patrick Hennessey', 'Thomas Bilder', 'Sister Agatha (Dracula)', 'Dr. Vincent', 'Thomas Snelling', 'Joseph Smollett', 'Sam Bloxam', 'Billington Junior', 'Attendant Hardy', 'Captain of the Demeter', 'Abramoff (Demeter Crew)', '