### Installing required packages

In [None]:
# !pip install langchain
# !pip install chainlit
# !pip install openai
# !pip install chromadb
# !pip install tiktoken
# !pip install bs4

### Importing required libraries

In [11]:
from langchain.document_loaders import WebBaseLoader
from langchain.document_loaders import PyPDFLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma                               # to store the data in a database
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI                            # for the language model (LLM)
from langchain.prompts import PromptTemplate                            # for defining prompt templates
from langchain.chains import RetrievalQA                                # for creating retrieval-based question answering systems
import chainlit as cl                                                   # for creating the user interface

### Defining functions

In [3]:
# Defining helper functions for setting up and managing components of a language model (LLM) based question answering system.
def get_openai_api_key(file_path='openai_api_key.txt'):
    """Read the OpenAI API key from a file."""
    with open(file_path, 'r') as f:
        return f.read()

def remove_blank_lines(text):
    """Remove blank lines from text content."""
    lines = text.split('\n')
    non_empty_lines = [line.strip() for line in lines if line.strip()]
    return '\n'.join(non_empty_lines)

def load_documents_from_web(url):
    """Load documents from a web URL."""
    loader = WebBaseLoader(url)
    return loader.load()
def load_document_from_pdf(file_path):
    """Load a document from a local file in PDF format."""
    loader = PyPDFLoader(file_path)
    return loader.load()
def load_document_from_csv(file_path):
    """Load a document from a local CSV file."""
    loader = CSVLoader(file_path=file_path)
    return loader.load()
def load_pdf_documents_from_directory(directory_path):
    """Load PDF documents from a directory."""
    glob_pattern = '**/*.pdf'  # Load PDF files from all subdirectories
    loader = DirectoryLoader(directory_path, glob=glob_pattern)
    return loader.load()


def split_documents_into_chunks(docs, chunk_size=500, chunk_overlap=75):
    """Split documents into chunks."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(docs)

def create_vector_database(chunks, embeddings, persist_directory):
    """Create vector database from document chunks."""
    vectordb = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=persist_directory
    )
    vectordb.persist()
    return vectordb

def initialize_chat_model(model_name, openai_api_key):
    """Initialize the chat model."""
    return ChatOpenAI(model_name=model_name, temperature=0.5, openai_api_key=openai_api_key, streaming=True)


In [4]:
OPENAI_API_KEY = '#####'

### Document Loading

In [7]:
# Load documents
docs = load_documents_from_web("https://www.jioinstitute.edu.in/faq")
# Remove blank lines from each document's text content
for doc in docs:
  doc.page_content = remove_blank_lines(doc.page_content)

In [8]:
len(docs)

1

In [9]:
docs[0].page_content

"FREQUENTLY ASKED QUESTIONS - Jio Institute\nSkip to main content\nsearch\nHeader Menu\nAbout\nAbout Us\nOur Story\nOur Vision\nFounders\nSmt. Nita Ambani\nShri. Mukesh Ambani\nLeadership\nChancellor\nVice Chancellor\nProvost\nGoverning Council\nGlobal Advisory Council\nAcademic Advisors\nThe Jio Institute Advantage\nGrowth Plan\nAcademics\nPG Programmes\nPG Programmes\nArtificial Intelligence & Data Science\nDigital Media & Marketing Communications\nSports Management\nHolistic Learning\nStudy Abroad\nCapstone & Industry Immersion\nLaboratories\nLibrary\nAdmissions\nDomestic\nInternational\nEntrance Test\nPlacements\nOffice of Career Services\nCareer Support\nCorporate Relations and Collaborations\nFaculty & Research\nFaculty\nResearch\nCentre of AI for All\nOverview\nCVMI Research\nInnovation and Entrepreneurship\nJio Digital Library\nCampus Life\nSecondary menu\nHeader Top Menu\nELP\nExecutive Education\nCareers\nDigital Library\nEvents\nNews\nBlog\nGallery\nSearch\nSocial Media Link

### Document Splitting (Chunks)

In [12]:
# Split documents into chunks
chunks = split_documents_into_chunks(docs)

In [13]:
len(chunks)

93

In [15]:
chunks[5:15]

[Document(page_content='Where is Jio Institute\xa0located?\xa0The Jio Institute campus is located in Ulwe, Navi Mumbai, and is well-connected with all major locations across Mumbai through public transport.\xa0Nearby landmarksRailway Station: Bamandongri (Nerul-Uran Line) 1.1 km , 10 minute-walkBus Stop: Sector 5, Ulwe / Aai Taru Mata Chowk (NMMT) 0.2 km, 3 minute-walk.Airport: Chhatrapati Shivaji Maharaj International Airport, 44 km, approximately 1.5-hour drive.\xa0\xa0What is the leadership structure of Jio Institute?\xa0Jio', metadata={'source': 'https://www.jioinstitute.edu.in/faq', 'title': 'FREQUENTLY ASKED QUESTIONS - Jio Institute', 'description': 'Learn more about Jio Institute and the various programmes offered.', 'language': 'en'}),
 Document(page_content='1.5-hour drive.\xa0\xa0What is the leadership structure of Jio Institute?\xa0Jio Institute is led by a Governing Council, a Global Advisory Council comprising academicians from around the world as well as different Academ

### Embeddings & Vectorstores

In [16]:
# Create OpenAI embeddings
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

  warn_deprecated(


In [17]:
# Create vector database

persist_directory = 'docs/chroma_db/'
vectordb = create_vector_database(chunks, embeddings, persist_directory)

2024-04-18 12:40:00 - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-04-18 12:40:01 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [18]:
# to check/print embeddings

# sentence1 = "i like dogs"
# embedding1 = embeddings.embed_query(sentence1)
# embedding1
# np.dot(embedding1, embedding2)

embedding0 = embeddings.embed_query(str(chunks[0]))
embedding0[:10]

2024-04-18 12:40:14 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


[0.0034888065669464808,
 0.002677692872168161,
 -0.01174926874064707,
 -0.019412425121234877,
 -0.027326721932328372,
 0.01513626149998903,
 -0.02518185264318611,
 -0.0085862646350884,
 -0.04455355289330494,
 -0.015923618321457767]

In [19]:
vectordb._collection.count()

2103

### Similarity Search for demo

question = "is there an email i can ask for help"  
docs = vectordb.similarity_search(question,k=3)  
len(docs)  
docs

In [20]:
question = "What are the courses offered by Jio Institute"
res = vectordb.similarity_search(question,k=3)

2024-04-18 12:41:00 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [21]:
len(res)

3

In [22]:

res

[Document(page_content='training in these programmes?The programmes are comprehensive and rigorous with a good blend of theory and practice. In addition to classroom-based lectures, the programmes include experiential and hands-on learning components such as case studies, lab assignments, projects, industry visits and capstone projects, etc.How is the curriculum structured?For details about curriculum, please visit website \xa0or write to us at you may reach out to us at admissions@jioinstitute.edu.inWhat all courses do you provide? Is it a degree programme?Presently, Jio Institute offers Postgraduate Programmes in three disciplines: Artificial Intelligence & Data Science (AI & DS), Digital Media & Marketing', metadata={'description': 'Learn more about Jio Institute and the various programmes offered.', 'language': 'en', 'source': 'https://www.jioinstitute.edu.in/faq', 'title': 'FREQUENTLY ASKED QUESTIONS - Jio Institute'}),
 Document(page_content='training in these programmes?The prog

### Retrieval

In [23]:
# Define Retriever
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})

### Initialize chat model

In [24]:
llm_name = 'gpt-3.5-turbo'
llm = initialize_chat_model(llm_name, OPENAI_API_KEY)

  warn_deprecated(


In [25]:
# Retrieval QA chain checking

# go to source code for RetrievalQA
# code line - 242 : connect to vector database

qa_chain_check = RetrievalQA.from_chain_type(
    llm,
    retriever= retriever
)
res = qa_chain_check({"query": question})
res['result']

  warn_deprecated(


2024-04-18 12:41:07 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2024-04-18 12:41:08 - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


'Jio Institute offers Postgraduate Programmes in three disciplines: Artificial Intelligence & Data Science (AI & DS), Digital Media & Marketing.'

#### Building prompt

In [24]:
# defining template
template = """Please utilize the provided context to respond to the question at the end. If you're unsure of the answer, it's perfectly fine to acknowledge that you don't know rather than attempting to fabricate a response. Keep your answer concise, limiting it to three sentences at most. Additionally, remember to conclude your response with "thanks for asking!
    {context}
    Question: {question}
    Helpful Answer:"""

# creating prompt chain
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)

#### creating qa retrieval chain

In [25]:
# go to source code for RetrievalQA
# code line - 242 : connect to vector database

qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=retriever,
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

In [26]:
Ans = qa_chain({"query": question})
res['result']

'Jio Institute is offering postgraduate programs in the following areas: Artificial Intelligence & Data Science, Digital Media & Marketing Communications, and Sports Management. They also plan to offer programs in Computing & Engineering, Architecture & Urban Planning, Media, Communication & Journalism, Arts, Humanities & Science, Management & Entrepreneurship, Law Governance & Policy, and Medicine at UG, PG, Doctoral, and Post-Doctoral levels in the future.'

In [27]:
Ans

{'query': 'What are the courses offered by Jio Institute',
 'result': 'Jio Institute offers postgraduate programmes in Artificial Intelligence & Data Science, Digital Media & Marketing Communications, and Sports Management. For more information, you can visit their website or contact them directly. Thanks for asking!',
 'source_documents': [Document(page_content='Where can I learn more about Jio Institute’s academic programmes?Jio Institute is offering postgraduate \xa0programmes in the following areas:For information about the Artificial Intelligence & Data Science (AI & DS) curriculum, click here.For information about the Digital Media & Marketing Communications (DM & MC) curriculum, click here.For information about the Sports Management curriculum, click here.When will Jio Institute begin its academic year?For this Academic Year, classes will commence', metadata={'description': 'Learn more about Jio Institute and the various programmes offered.', 'language': 'en', 'source': 'https:/

In [10]:
import pandas as pd
df = pd.read_csv('chat_log.csv')

In [13]:
df.head()

Unnamed: 0,User's Query,Retrieval Time,Closest Similar Match 1,Closest Similar Match 2,Closest Similar Match 3,Summarization Time,Summarized Response
0,What are different type of games available ins...,0.447,Mumbai and South Mumbai.What facilities are av...,Mumbai and South Mumbai.What facilities are av...,Mumbai and South Mumbai.What facilities are av...,1.779,The Jio Institute campus offers a variety of s...
1,what courses are offered,0.744,training in these programmes?The programmes ar...,training in these programmes?The programmes ar...,training in these programmes?The programmes ar...,1.61,Jio Institute offers Postgraduate Programmes i...
2,hi,0.65,weekends?Jio Institute has set up a state-of-t...,you to better understand the application of kn...,you to better understand the application of kn...,1.043,Hello! How can I assist you today? Thanks for ...
3,who are faculties for AI DS programme,0.405,in three disciplines: Artificial Intelligence ...,in three disciplines: Artificial Intelligence ...,in three disciplines: Artificial Intelligence ...,1.822,The faculties for the AI & DS programme are ex...
4,who are faculties for DMMC programme,0.469,in DM & MC comprises electives such as Content...,in DM & MC comprises electives such as Content...,in DM & MC comprises electives such as Content...,1.958,The faculties for the DMMC programme consist o...
