# A simple RAG application using open-source models

In [4]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = "gpt-3.5-turbo"
# MODEL = "mixtral:8x7b"
MODEL = "llama2"


In [9]:
from langchain_community.llms import Ollama
# from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
# from langchain_openai.embeddings import OpenAIEmbeddings

# if MODEL.startswith("gpt"):
#     model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
#     embeddings = OpenAIEmbeddings()
# else:
    # model = Ollama(model=MODEL)
    # embeddings = OllamaEmbeddings(model=MODEL)
    
model = Ollama(model=MODEL)
embeddings = OllamaEmbeddings(model=MODEL)

model.invoke("Tell me a joke")

"Sure, here's one:\n\nWhy don't scientists trust atoms?\nBecause they make up everything!"

In [10]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser 
chain.invoke("Tell me a joke")

'Why was the math book sad? Because it had too many problems! 😂'

In [11]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
prompt.format(context="Here is some context", question="Here is a question")

'\nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: Here is some context\n\nQuestion: Here is a question\n'

In [12]:
chain = prompt | model | parser

chain.invoke({"context": "My parents named me Santiago", "question": "What's your name'?"})

' Sure! Based on the context provided, the answer to the question "What\'s your name?" is "Santiago."'

In [14]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("mlschool.pdf")
pages = loader.load_and_split()
pages

[Document(metadata={'source': 'mlschool.pdf', 'page': 0}, page_content='Building Machine Learning Systems That Don\'t\nSuck\nA live, interactive program that\'ll help you build production-readymachine learning\nsystems from the ground up.\nNext cohort:\xa0November4 - 21, 2024\nCheck the schedulefor more details about upcoming cohorts.\nI want to join!Sign in\nLearn how to design, build, deploy, and scale machine learning\nsystems to solve real-world problems.\nI\'ll lose my mind if I see another book or course teaching people the same basic ideas\nfor the hundredth time. Most people are stuck in beginner mode, and finding help to\nsolve real-world problems is hard.\nI want to change that.\nI started writing software 30 years ago. I\'ve written pipelines and trained models for\nsome of the largest companies in the world. I want to show you how to do the same."This is the best machine learning course I\'ve done.\nWorth every cent."\n— Jose Reyes, AI/ML at Cevo Australia9/25/24, 3:06 PM B

In [15]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)



In [16]:
retriever = vectorstore.as_retriever()
retriever.invoke("machine learning")

[Document(metadata={'source': 'mlschool.pdf', 'page': 3}, page_content="Here is a summary of what makes this program unique:\nYou'll design and write the code to build an end-to-end machine learning\nsystem starting from scratch.\nYou'll learn best practices to tackle the most significant challenges machine\nlearning engineers face to build, evaluate, run, monitor, and maintain machine\nlearning systems in real-world scenarios.\nYou'll learn how to use techniques like active learning, distributed training,\nadversarial validation, human-in-the-loop deployments, model compression,\ntest-time augmentation, testing in production, among many others.\nYou'll learn how to create training, deploying, monitoring, and inference\npipelines using Amazon SageMaker and open-source tools.\nForget about theoretical concepts. This program will show you some of the things I've\nlearned from real-life examples I've built during more than 30 years in the industry.\nCheck the program syllabus\xa0→\nReal-l

In [17]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [18]:
questions = [
    "What is the purpose of the course?",
    "How many hours of live sessions?",
    "How many coding assignments are there in the program?",
    "Is there a program certificate upon completion?",
    "What programming language will be used in the program?",
    "How much does the program cost?",
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: What is the purpose of the course?
Answer: Based on the context provided, the purpose of the course appears to be teaching students practical skills and insights to build machine learning systems that work in the real world. The course aims to provide participants with the knowledge and tools they need to succeed in building and scaling enterprise software and machine learning systems, as well as to help them unlearn what they think machine learning is and learn about trade-offs and how to create products that work. Additionally, the course promises to provide direct feedback from the instructor and access to a private community for collaboration with thousands of people from different backgrounds.

Question: How many hours of live sessions?
Answer: Based on the document provided, there are 18 hours of live sessions in the program.

Question: How many coding assignments are there in the program?
Answer: The answer to the question is:

There are 100 coding assignments and prac

In [None]:
chain.batch([{"question": q} for q in questions])

In [30]:
for s in chain.stream({"question": "What is the purpose of the course?"}):
    print(s, end="", flush=True)

The purpose of the course is to teach practical skills and insights that will help individuals build machine learning systems that work in the real world.

In [109]:
# from langchain_community.document_loaders import WebBaseLoader
# from langchain_text_splitters import RecursiveCharacterTextSplitter

# loader = WebBaseLoader("https://www.ml.school")
# docs = loader.load()
# documents = RecursiveCharacterTextSplitter(
#     chunk_size=1000, chunk_overlap=200
# ).split_documents(docs)

# documents

[Document(page_content='Building Machine Learning Systems That Don\'t Suck"This is the best machine learning course I\'ve done ever. Worth every cent."Jose Reyes, AI/ML at Cevo AustraliaBuilding Machine Learning Systems That Don\'t SuckA live, interactive program that\'ll help you build production-ready machine learning systems from the ground up.Next cohort:\xa0April 8 - 25, 2024Check the schedule for more details about upcoming cohorts.Register nowLearn how to design, build, deploy, and scale machine learning systems to solve real-world problems.I\'ll lose my mind if I see another book or course teaching people the same basic ideas for the hundredth time. Most people are stuck in beginner mode, and finding help to solve real-world problems is hard.I want to change that.I started writing software 30 years ago. I\'ve written pipelines and trained models for some of the largest companies in the world. I want to show you how to do the same.This is the class I wish I had taken when I star