In [16]:
import os
import re
import PyPDF2 
from dotenv import load_dotenv
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

In [3]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

os.environ["OPENAI_API_KEY"] = api_key

In [35]:
def read_pdf(file_path):
    text = ''
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        num_pages = len(reader.pages)
        for n in range(num_pages):
            page = reader.pages[n]
            text += ' ' + page.extract_text()
    return text

In [36]:
text = read_pdf('shrek-script-pdf.pdf')
print(len(text))

71791


In [38]:
# Replace newline characters with whitespaces
text = re.sub(r'\n', ' ', text)
print(len(text))

71791


In [39]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,
                                               chunk_overlap = 50,
                                               length_function = len)

# Replace this with our data
splits = text_splitter.split_text(text)
print(len(splits))

76


In [40]:
rags = [Document(page_content=txt) for txt in splits[:90]]

In [41]:
vectorstore = Chroma.from_documents(
    documents = rags, 
    embedding = OpenAIEmbeddings(),
    persist_directory = "data/chroma_db"
)

retriever = vectorstore.as_retriever()

In [42]:
llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature=0.0)

memory = ConversationBufferMemory(
    llm = llm,
    memory_key = "chat_history",
    return_messages=True
)

chat = ConversationalRetrievalChain.from_llm(
    llm,
    retriever = retriever,
    memory = memory
)

In [43]:
res = chat("What is donkeys personality?")

In [44]:
print(res["answer"])

Donkey's personality in the context provided is portrayed as talkative, curious, humorous, loyal, and sometimes a bit naive. He is shown to be inquisitive, not afraid to ask questions, and often provides comic relief in the interactions with Shrek. Donkey also shows a caring side, as he expresses concern for Shrek and tries to understand his motivations.
