In [44]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = "gpt-3.5-turbo"
# MODEL = "mixtral:8x7b"
MODEL = "llama3"



In [45]:
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)

# model.invoke("Tell me a joke")

In [46]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser 
chain.invoke("Tell me a joke")

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

chain = prompt | model | parser

chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

'Santiago!'

In [47]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import DocArrayInMemorySearch

loader = PyPDFLoader("Testing API PDF Processing.pdf")
pages = loader.load_and_split()

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

retriever = vectorstore.as_retriever()
retriever.invoke("mew")



[Document(page_content='Vocabulary\nfor\nUnit\n12\nof\nComputer\nArchitecture\n●\nDogwater\n-\nyou\nsuck\nat\nthe\ngame\n●\nMew\n-\nmaking\nyourself\nhandsomer\n●\nWoolwars\n-\na\nminecraft\ngamemode\nin\nHypixel\n●\nRizz\n-\nshort\nfor\ncharisma,\nmeaning\ncharm\n●\nGyatt\n-\nbutt\n●\nEgirl\n-\nsome\ngirl\nthat\nonline\ndates\n●\nEmo\n-\nsomeone\nthat\ndresses\nin\nall\nblack\n(prime\nexample:\nant)\n●\nDiscord\n-\nonline\ncommunication\nplatform', metadata={'source': 'Testing API PDF Processing.pdf', 'page': 0})]

In [48]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

questions = [
    "What is the definition of mewing?",
    "Give me an example of an emo?",
    "How many terms are in this pdf?",
    "What class does this document cover?",
    "What unit are we on?",
    "What is woolwars?",
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: What is the definition of mewing?
Answer: Based on the context, I can answer that "Mew" has a definition of "making yourself handsomer".

Question: Give me an example of an emo?
Answer: Based on the context, an example of an emo would be "Anthony" (according to the text: "(prime example: ant)").

Question: How many terms are in this pdf?
Answer: I can answer the question!

According to the context, there are 11 terms listed:

1. Dogwater
2. Mew
3. Woolwars
4. Rizz
5. Gyatt
6. Egirl
7. Emo
8. Discord

So, the answer is: 8

Question: What class does this document cover?
Answer: Based on the context, I would say that this document covers the "Vocabulary" for Unit 12 of Computer Architecture.

Question: What unit are we on?
Answer: Based on the context, I can answer that question!

We're on Unit 12 of Computer Architecture!

Question: What is woolwars?
Answer: According to the context, Woolwars refers to a Minecraft game mode in Hypixel.

