In [11]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = "gpt-3.5-turbo"
# MODEL = "mixtral:8x7b"
MODEL = "llama2"


In [13]:
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)

model.invoke("Tell me a joke")

"Sure, here's one:\n\nWhy don't scientists trust atoms?\nBecause they make up everything!\n\nI hope that brought a smile to your face!"

In [14]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser 
chain.invoke("Tell me a joke")

'\nWhy was the math book sad? Because it had too many problems! 😂'

In [17]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context="Here is some context", question="Here is a question"))


Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: Here is some context

Question: Here is a question



In [18]:
chain = prompt | model | parser

chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

' Great! Based on the context you provided, my answer to the question "What\'s your name?" would be "Santiago."'

In [15]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("2.pdf")
pages = loader.load_and_split()
pages

[Document(page_content='Software Architecture \nDocumentation \n \nCo-op Evaluation System \nSenior Project 2014-2015 \n \n \n \n \nTeam Members: \nTyler Geery \nMaddison Hickson \nCasey Klimkowsky \nEmma Nelson \n \nFaculty Coach: \nSamuel Malachowsky \n \nProject Sponsors: \nJim Bondi (OCSCE) \nKim Sowers (ITS)\xa0\n\xa0 \xa0\n1', metadata={'source': '2.pdf', 'page': 0}),
 Document(page_content='Table of Contents \nTable\xa0of\xa0Contents\xa0\nRevision\xa0History\xa0\n1Introduction\xa0\n2Background\xa0\n3Functional\xa0Requirements\xa0\n4Quality\xa0Attributes\xa0\n4.1Usability\xa0\n4.2Availability\xa0\n4.3Maintainability\xa0\n4.4Testability\xa0\n5Architecture\xa0Overview\xa0\n5.1Big\xa0Picture\xa0\n5.1.1System\xa0Context\xa0\n5.1.2User\xa0Interactions\xa0\n5.1.3Data\xa0Flow\xa0\n5.2View\xa0Introduction\xa0\n5.3Patterns\xa0and\xa0Tactics\xa0\n5.3.1Architectural\xa0Drivers\xa0and\xa0Tactics\xa0\nUsability\xa0\nAvailability\xa0\nMaintainability\xa0\nTestability\xa0\n5.3.2Patterns\xa0\nSe

In [19]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)



In [20]:
retriever = vectorstore.as_retriever()
retriever.invoke("machine learning")

[Document(page_content='3 Functional Requirements \nMany\xa0of\xa0the\xa0features\xa0involve\xa0saving,\xa0updating,\xa0or\xa0viewing\xa0evaluation\xa0forms,\xa0and\xa0thus\xa0will\xa0need\xa0\nto\xa0be\xa0accounted\xa0for\xa0in\xa0the\xa0architecture\xa0due\xa0to\xa0amount\xa0of\xa0interfacing\xa0with\xa0the\xa0database\xa0\nrequired.\xa0The\xa0system\xa0must\xa0support\xa0concurrent\xa0reads\xa0from,\xa0and\xa0writes\xa0to\xa0the\xa0database.\xa0\xa0\nAdditionally,\xa0the\xa0system\xa0may\xa0have\xa0the\xa0need\xa0to\xa0interface\xa0with\xa0several\xa0external\xa0APIs.\xa0The\xa0\nsystem\xa0must\xa0interact\xa0with\xa0ITS’s\xa0email\xa0server\xa0in\xa0order\xa0to\xa0send\xa0emails\xa0to\xa0students,\xa0employers,\xa0\nand\xa0other\xa0users.\xa0Furthermore,\xa0although\xa0the\xa0particular\xa0services\xa0are\xa0unknown\xa0at\xa0this\xa0time,\xa0it\xa0is\xa0\nlikely\xa0that\xa0the\xa0system\xa0will\xa0have\xa0to\xa0interface\xa0with\xa0an\xa0external\xa0report\xa0and\xa0form\xa0generat

In [21]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [22]:
questions = [
    "How many servers are getting used in this document"
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: How many servers are getting used in this document
Answer: Based on the content of the document, it appears that there are 3 servers involved in the system architecture:

1. Data Layer: The document mentions "Data Layer to DAL to Data Source Layer" which suggests that there are 3 layers involved: Data Layer, DAL (Data Access Layer), and Data Source Layer.
2. Domain Layer: The document also mentions "Domain Layer to DAL to Data Source Layer" which implies that there are 3 layers involved: Domain Layer, DAL, and Data Source Layer.
3. Environment: The document talks about "Environment at runtime" which suggests that the system is deployed in a runtime environment with multiple servers.

So, there are 3 servers getting used in this document:

1. Data Layer
2. Domain Layer
3. Runtime Environment

