# Using `ollama` models with RAG

* CSV loader
  - https://python.langchain.com/docs/integrations/document_loaders/csv/

### Setup

In [1]:
import openai
import requests
import json
from openai import OpenAI
from dotenv import load_dotenv

from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import Ollama

import pandas as pd

__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')


In [2]:
_ = load_dotenv()

* Connect to Ollama server

In [3]:
client = OpenAI(
    base_url = 'http://10.30.16.100:11434/v1',
    api_key='ollama', # required, but unused
)

### Setup a query on a base model with no recent DP knowledge

In [4]:
query =  '''
        It is 1:21 PM. My train is delayed at Penn Station. What is going on and what should I do?
        '''

In [5]:
messages = [
    {"role": "system", "content": "You are a helpful assistant. Answer questions ONLY if you know the answer"},
    {"role": "user", "content": query},
]

In [6]:
response = client.chat.completions.create(
  model="llama3",
  messages=messages,
  max_tokens=100
)
print(response.choices[0].message.content)

Sorry to hear that your train is delayed!

Penn Station (also known as New York City's Pennsylvania Station) is a busy transportation hub, and delays can happen for various reasons.

**What might be going on:**
There could be several factors contributing to the delay. Common causes include:

1. **Mechanical issues:** A problem with the train or its equipment, such as an electrical issue or mechanical failure.
2. **Signaling issues:** An issue with the signaling system that controls train movements


### Manually include relevant document as context

In [7]:
dp_doc = open('situations.csv').read()

In [8]:
messages = [
    {"role": "system", "content": "You are a public relations chatbot that specializes in helping riders find the specificities of their alternative route and solve their problem as well as calm their nerves. "},
    {"role": "user", "content": dp_doc},
    {"role": "user", "content": query},
   
  ]

In [9]:
print(messages)

[{'role': 'system', 'content': 'You are a public relations chatbot that specializes in helping riders find the specificities of their alternative route and solve their problem as well as calm their nerves. '}, {'role': 'user', 'content': 'Situation,Urgent(yes/no),Current time,Alternative routes,Support,Safety Precautions,Compensation\n"Someone Died at the Trenton Station, causing it to close, on the amtrak from Philadelphia to New York Penn Station",yes,1:14 PM,NJ transit real NEC 3896 towards New York Penn Station,Call 222-333-4444 or use chat bot,"Avoid this line, avoid pushing or shoving near this line",All tickets will be refunded\nThere is an armed person at Penn Station. All upcoming transit has been put to a halt.,yes,1:22 PM,Southbound Megabus,Call 222-333-4444 or use chat bot,Avoid this station completely,All tickets will be refunded\nThe armed person has left and been arrested. All is clear.,yes,1:30 PM,n/a,Call 222-333-4444 or use chat bot,"Avoid pushing or shoving, follow a

In [10]:
response = client.chat.completions.create(
  model="llama3",
  messages=messages,
  max_tokens=200
)
print(response.choices[0].message.content)

I'm so sorry to hear that your train is delayed! There was a situation at Penn Station earlier where an armed person was present, which caused all upcoming transit to be halted temporarily. Thankfully, the individual has been arrested and everything is now clear.

However, as a precautionary measure, I would advise you to avoid pushing or shoving near the area, and follow all posted signs for instructions. You can also consider reaching out to NJ Transit's customer service at 222-333-44 or using our chatbot for further assistance.

Please note that all tickets will be refunded in this situation.


## `langchain` RAG example

### 1. Load documents

In [11]:
loader = CSVLoader(file_path="situations.csv")

docs = loader.load()

In [12]:
docs[:5]

[Document(page_content='Situation: Someone Died at the Trenton Station, causing it to close, on the amtrak from Philadelphia to New York Penn Station\nUrgent(yes/no): yes\nCurrent time: 1:14 PM\nAlternative routes: NJ transit real NEC 3896 towards New York Penn Station\nSupport: Call 222-333-4444 or use chat bot\nSafety Precautions: Avoid this line, avoid pushing or shoving near this line\nCompensation: All tickets will be refunded', metadata={'source': 'situations.csv', 'row': 0}),
 Document(page_content='Situation: There is an armed person at Penn Station. All upcoming transit has been put to a halt.\nUrgent(yes/no): yes\nCurrent time: 1:22 PM\nAlternative routes: Southbound Megabus\nSupport: Call 222-333-4444 or use chat bot\nSafety Precautions: Avoid this station completely\nCompensation: All tickets will be refunded', metadata={'source': 'situations.csv', 'row': 1}),
 Document(page_content='Situation: The armed person has left and been arrested. All is clear.\nUrgent(yes/no): yes\

In [13]:
print(f'Loaded {len(docs)} documents')

Loaded 3 documents


### 2. Load chunks into vector store

1. Using OpenAI Embeddings

In [14]:
#vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())

2. Using Ollama and open weight embeddings

In [15]:
embeddings = OllamaEmbeddings(
    base_url="http://10.30.16.100:11434",
    model="nomic-embed-text")

vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings)

In [16]:
retriever = vectorstore.as_retriever(search_type="similarity", 
                                     search_kwargs={"k": 1})



#### Example query on vector database

In [17]:
print(query)


        It is 1:21 PM. My train is delayed at Penn Station. What is going on and what should I do?
        


In [18]:
retrieved_docs = retriever.invoke(query)
len(retrieved_docs)

1

In [19]:
relevance_docs_and_scores = vectorstore.similarity_search_with_relevance_scores(query,
                                                                                k=5)

Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


In [20]:
chunks = []
for chunk, score in relevance_docs_and_scores:
    chunks.append({'content': chunk.page_content,
                   'similarity_score': score,
                   'document': chunk.metadata['source']})

In [21]:
chunks

[{'content': 'Situation: There is an armed person at Penn Station. All upcoming transit has been put to a halt.\nUrgent(yes/no): yes\nCurrent time: 1:22 PM\nAlternative routes: Southbound Megabus\nSupport: Call 222-333-4444 or use chat bot\nSafety Precautions: Avoid this station completely\nCompensation: All tickets will be refunded',
  'similarity_score': -245.76287380983638,
  'document': 'situations.csv'},
 {'content': 'Situation: Someone Died at the Trenton Station, causing it to close, on the amtrak from Philadelphia to New York Penn Station\nUrgent(yes/no): yes\nCurrent time: 1:14 PM\nAlternative routes: NJ transit real NEC 3896 towards New York Penn Station\nSupport: Call 222-333-4444 or use chat bot\nSafety Precautions: Avoid this line, avoid pushing or shoving near this line\nCompensation: All tickets will be refunded',
  'similarity_score': -248.2115504116541,
  'document': 'situations.csv'},
 {'content': 'Situation: The armed person has left and been arrested. All is clear.\

In [22]:
pd.DataFrame(chunks)

Unnamed: 0,content,similarity_score,document
0,Situation: There is an armed person at Penn St...,-245.762874,situations.csv
1,Situation: Someone Died at the Trenton Station...,-248.21155,situations.csv
2,Situation: The armed person has left and been ...,-313.317121,situations.csv


## `langchain` RAG chain

In [23]:
prompt = hub.pull("rlm/rag-prompt")

In [24]:
prompt.messages[0].prompt.template

"You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"

* Setup LLM component

In [25]:
ollama = Ollama(
        base_url="http://10.30.16.100:11434",
        model="llama3")

### Setup chains

In [26]:


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_result = (retriever | format_docs)

norag_chain = (
    prompt
    | ollama
    | StrOutputParser()
)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | ollama
    | StrOutputParser()
)

#### with RAG

In [28]:
rag_chain.invoke(query)

KeyboardInterrupt: 

#### No RAG

In [29]:
norag_chain.invoke({"question": query, 'context': ''})

KeyboardInterrupt: 

#### Looking at the context

In [None]:
print(rag_result.invoke(query))

In [77]:
print(query)


        How much is a Classic Dash Burger plus an apple pie?
        
