In [1]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_community.vectorstores import FAISS
import json
from langchain.prompts import PromptTemplate
from langchain.document_loaders import WebBaseLoader
from langchain_text_splitters import TokenTextSplitter
import ollama
from bs4 import BeautifulSoup
import requests
from langchain.docstore.document import Document

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
Embedding_Model = "nomic-embed-text"
Model = Ollama(model="llama3-chatqa")

In [3]:
# docLoader = WebBaseLoader("https://en.wikipedia.org/wiki/Angling")
# document = docLoader.load()
# text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
# docs = text_splitter.split_documents(document)
# db = FAISS.from_documents(docs, OllamaEmbeddings(model = Embedding_Model))
# retriever = db.as_retriever() 

### Scraping a URL

In [4]:
# For Preprocessing 

def remove_whitespace(text):
    return  " ".join(text.split())

In [5]:
url = 'https://en.wikipedia.org/wiki/1896_Summer_Olympics'

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html.parser')

data = soup.findAll("div", class_ = 'mw-body-content')[0].text

data = remove_whitespace(data)

print(data)

Multi-sport event in Athens, Greece Games of the I OlympiadCover of the official report for the 1896 Summer OlympicsLocationAthens, GreeceNations14[note1]Athletes241 (all men)[note2]Events43 in 9 sports (10 disciplines)Opening6 April 1896Closing15 April 1896Opened byKing George I[1]StadiumPanathenaic StadiumParis 1900 → The 1896 Summer Olympics (Greek: Θερινοί Ολυμπιακοί Αγώνες 1896, romanized: Therinoí Olympiakoí Agónes 1896), officially known as the Games of the I Olympiad (Greek: Αγώνες της 1ης Ολυμπιάδας, romanized: Agónes tis 1is Olympiádas) and commonly known as Athens 1896 (Greek: Αθήνα 1896), was the first international Olympic Games held in modern history. Organised by the International Olympic Committee (IOC), which had been created by French aristocrat Pierre de Coubertin, it was held in Athens, Greece, from 6 to 15 April 1896.[2] Fourteen nations (according to the IOC, though the number is subject to interpretation) and 241 athletes (all males; this number is also disputed)

### Converting the scraped text into documents

In [6]:
text_splitter = TokenTextSplitter(chunk_size=500,chunk_overlap=100)
documents = text_splitter.split_text(data)
docs = [Document(page_content=document, metadata={"source": "local"}) for document in documents]

### Creating the vector store

In [7]:
db = FAISS.from_documents(docs, OllamaEmbeddings(model = Embedding_Model))
retriever = db.as_retriever() 

In [8]:
def combineDocs(docs):
    context = "\n\n".join(f'Document Content : \n{doc.page_content} ]' for doc in docs)
    return context

In [26]:
history = [' ', ' ', ' ', ' ', ' ', ' '] 
def chat(question) : 
    
    global history

    contextString = combineDocs(retriever.invoke(question))
    print(contextString)
    systemMessage = """ 
    You are a helpful assistant. You are given conversation history and a question with some context from the user. 
    Answer the question based on the information given in that context or based on the conversation history if needed."
    
    """
    message=[
                {
                    'role': 'system',
                    'content': systemMessage,
                },
                {
                    'role': 'user',
                    'content': history[-6],
                },
                {
                    'role': 'assistant',
                    'content': history[-5],
                },
                {
                    'role': 'user',
                    'content': history[-4],
                },
                {
                    'role': 'assistant',
                    'content': history[-3],
                },
                {
                    'role': 'user',
                    'content': history[-2],
                },
                {
                    'role': 'assistant',
                    'content': history[-1],
                },
                {
                    'role': 'user',
                    'content': "Here is the context : " + contextString,
                },{
                    'role': 'user',
                    'content': question,
                },
        ]
    
    response = ollama.chat(model = "llama3-chatqa", messages = message)
    answer = response['message']['content']
    history.append(question)
    history.append(answer)
    
    print(answer)
    

In [27]:
# chatHistory = "\n"

# latestPrompt = ""

# template = """
# You are a helpful assistant. You are given some text, conversation history and a question. 
# Answer the question based on the information given in the text or based on the conversation if needed
# If the answer is not available in the text say I dont know

# ## Text ##
# {context}

# ## Conversation ##
# {conversation}
# \n
# Question : {question}
# Answer: 

# """

# prompt = PromptTemplate.from_template(template)

# def chat(question): 
    
#     global chatHistory
#     global latestPrompt
#     # Retrieve docs from vector DB
#     #docs = vectors.similarity_search(question, k = 4)
#     #contextString = combineDocs2(docs)
#     contextString = combineDocs(retriever.invoke(question))
    
#     query = prompt.format(conversation = chatHistory, context = contextString, question = question)
#     latestPrompt = query
#     response = Model.invoke(query)
    
#     chatHistory = chatHistory + "\nQuestion : " + question + "\nAnswer : " + response

#     return response


In [28]:
chat("Tell me about the opening ceremony")

Document Content : 
Thu 10thFri 11thSat 12thSun 13thMon 14thTue 15thWed Total events ‡ The iconic Olympic rings symbol was not designed by Baron Pierre de Coubertin until 1912. Note: Silver medals were awarded to the winners, with copper medals given to the runners-up, and no prizes were given to those who came in 3rd place in any events. Opening ceremony The opening ceremony in the Panathenaic Stadium On Easter Monday 6 April (25 March according to the Julian calendar then in use in Greece), the games of the First Olympiad were officially opened.[29] The Panathenaic Stadium was filled with an estimated 80,000 spectators, including King George I of Greece, his wife Olga, and their sons. Most of the competing athletes were aligned on the infield, grouped by nation. After a speech by the president of the organising committee, Crown Prince Constantine, his father officially opened the Games with the words (in Greek):[30] "I declare the opening of the first international Olympic Games in A

In [23]:
chat("how many world records were set?")

' 14 nations and 241 athletes (all males) took part in the games.'

In [24]:
chatHistory

'\n\nQuestion : Tell me about the opening ceremony\nAnswer : \nQuestion : how many world records were set?\nAnswer :  14 nations and 241 athletes (all males) took part in the games.'

In [25]:
print(latestPrompt)


You are a helpful assistant. You are given some text, conversation history and a question. 
Answer the question based on the information given in the text or based on the conversation if needed
If the answer is not available in the text say I dont know

## Text ##
Document Content : 
 The number of competitors given according to the International Olympic Committee. Mallon & Widlund give a total of 245 athletes, while De Wael gives 246. The identities of 179 athletes at the Games are known. References ^ "Factsheet – Opening Ceremony of the Games of the Olympiad" (PDF) (Press release). International Olympic Committee. 13 September 2013. Archived (PDF) from the original on 14 August 2016. Retrieved 22 December 2018. ^ a b "First modern Olympic Games". HISTORY. Retrieved 6 April 2021. ^ a b "Athens 1896 Summer Olympics". International Olympic Committee. Retrieved 6 April 2021. ^ https://olympics.com/en/olympic-games/athens-1896 ^ a b c d e f g h i j k l m n o p Young, David C. (1996). The