In [1]:
# #langchain library
# !pip install langchain==0.1.6
# !pip install langchain-community==0.0.19
# #LLM
# !pip install accelerate==0.25.0
# !pip install transformers==4.36.2
# !pip install bitsandbytes==0.41.2
# #Text Embedding
# !pip install sentence-transformers==2.2.2
# !pip install InstructorEmbedding==1.0.1
# #vectorstore
# !pip install pymupdf==1.23.8
# !pip install faiss-gpu==1.7.2
# !pip install faiss-cpu==1.7.4

In [2]:
import os
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## 1. Prompt

A set of instructions or input provided by a user to guide the model's response, helping it understand the context and generate relevant and coherent language-based output, such as answering questions, completing sentences, or engaging in a conversation.

In [3]:
from langchain import PromptTemplate

prompt_template = """
    Welcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).
    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!
    {context}
    Question: {question}
    Answer:
    """.strip()

AIT_PROMPT = PromptTemplate.from_template(
    template=prompt_template
)

AIT_PROMPT

PromptTemplate(input_variables=['context', 'question'], template="Welcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).\n    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!\n    {context}\n    Question: {question}\n    Answer:")

In [4]:
AIT_PROMPT.format(
    context = "The Asian Institute of Technology (AIT) is an international English-speaking postgraduate institution, focusing on engineering, environment, and management studies. AIT’s rigorous academic, research, and experiential outreach programs prepare graduates for professional success and leadership roles in Asia and beyond.",
    question = "What is AIT"
)

"Welcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).\n    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!\n    The Asian Institute of Technology (AIT) is an international English-speaking postgraduate institution, focusing on engineering, environment, and management studies. AIT’s rigorous academic, research, and experiential outreach programs prepare graduates for professional success and leadership roles in Asia and beyond.\n    Question: What is AIT\n    Answer:"

Note : [How to improve prompting (Zero-shot, Few-shot, Chain-of-Thought, etc.](https://github.com/chaklam-silpasuwanchai/Natural-Language-Processing/blob/main/Code/05%20-%20RAG/advance/cot-tot-prompting.ipynb)

## 2. Retrieval

1. `Document loaders` : Load documents from many different sources (HTML, PDF, code). 
2. `Document transformers` : One of the essential steps in document retrieval is breaking down a large document into smaller, relevant chunks to enhance the retrieval process.
3. `Text embedding models` : Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of text that are similar.
4. `Vector stores`: there has emerged a need for databases to support efficient storage and searching of these embeddings.
5. `Retrievers` : Once the data is in the database, you still need to retrieve it.

### 2.1 Document Loaders 
Use document loaders to load data from a source as Document's. A Document is a piece of text and associated metadata. For example, there are document loaders for loading a simple .txt file, for loading the text contents of any web page, or even for loading a transcript of a YouTube video.

[PDF Loader](https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf)

[Download Document](https://web.stanford.edu/~jurafsky/slp3/)

In [5]:
from langchain.document_loaders import PyMuPDFLoader

nlp_docs = './docs/ait_paragraphs.pdf'

loader = PyMuPDFLoader(nlp_docs)
documents = loader.load()

In [6]:
# import os
# from langchain.document_loaders import PyMuPDFLoader

# # Directory containing the PDF files
# pdf_folder = '/home/koala/github/NLP/NLP-A7/about_ait'

# # Loop through all PDF files in the directory
# for filename in os.listdir(pdf_folder):
#     if filename.endswith('.pdf'):
#         # Construct the full path to the PDF file
#         pdf_path = os.path.join(pdf_folder, filename)
#         # Load the PDF file using PyMuPDFLoader
#         loader = PyMuPDFLoader(pdf_path)
#         # Load the document
#         documents = loader.load()
        
#         # Now you can work with 'document' as needed
#         # For example, you can process the document here
#         # or perform any other operations you need

In [7]:
documents

[Document(page_content='Schools Asian Institute of Technology BOI STP Intranet Students Contact\nGiving Alumni About About AIT Facts and figures Rankings Leadership\nPeople Meet our faculty Meet our staff Location Academics Academic\ncalendar Academic Programs Study options Student opportunities Schools Apply Admissions\nEligibility Financial aid Tuition and fees Student housing Apply online\nFrequently Asked Questions Research Research Overview Research Themes Research Publications\nResearch Projects Know Our Researcher Centers SEARCH MENU Schools Campus\nlife Housing Dining Health and Wellness Cultural diversity Athletics Career\ndevelopment Facilities AIT Conference Center Virtual tour Key thematic areas\nClimate change Smart Communities Food Energy Water Infrastructure Technology Policy\nSociety Sustainability Partners News Events Work at AIT Emergency contacts\nCovid management About About AIT Facts and figures Rankings LeadershipPeople\nMeet our facultyMeet our staff LocationAcad

In [8]:
len(documents) # sample the documents by page

33

In [9]:
documents[1]

Document(page_content='cookies are absolutely essential for the website to function properly\nThese cookies ensure basic functionalities and security features of the\nwebsite anonymously CookieDurationDescriptioncookielawinfo checkbox monthsThis cookie is set by\nGDPR\nCookie Consent plugin The cookie is used to store the\nuser consent for the cookies in the category Analytics cookielawinfo\ncheckbox monthsThe cookie is set by GDPR cookie consent to\nrecord the user consent for the cookies in the category\nFunctional cookielawinfo checkbox monthsThis cookie is set by GDPR Cookie\nConsent plugin The cookies is used to store the user\nconsent for the cookies in the category Necessary cookielawinfo checkbox\nmonthsThis cookie is set by GDPR Cookie Consent plugin The\ncookie is used to store the user consent for the\ncookies in the category Other cookielawinfo checkbox monthsThis cookie is\nset by GDPR Cookie Consent plugin The cookie is used\nto store the user consent for the cookies in t

### 2.2 Document Transformers

This text splitter is the recommended one for generic text. It is parameterized by a list of characters. It tries to split on them in order until the chunks are small enough

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap = 100
)

doc = text_splitter.split_documents(documents)

In [11]:
doc[1]

Document(page_content='development Facilities AIT Conference Center Virtual tour Key thematic areas\nClimate change Smart Communities Food Energy Water Infrastructure Technology Policy\nSociety Sustainability Partners News Events Work at AIT Emergency contacts\nCovid management About About AIT Facts and figures Rankings LeadershipPeople\nMeet our facultyMeet our staff LocationAcademics Academic calendar Academic Programs\nStudy options Student opportunities SchoolsApply Admissions Eligibility Financial aid Tuition\nand fees Student housing Apply online Frequently Asked QuestionsResearch Research\nOverview Research Themes Research Publications Research Projects Know Our Researcher', metadata={'source': './docs/ait_paragraphs.pdf', 'file_path': './docs/ait_paragraphs.pdf', 'page': 0, 'total_pages': 33, 'format': 'PDF 1.4', 'title': '(anonymous)', 'author': '(anonymous)', 'subject': '(unspecified)', 'keywords': '', 'creator': '(unspecified)', 'producer': 'ReportLab PDF Library - www.repor

In [12]:
len(doc)

201

### 2.3 Text Embedding Models
Embeddings create a vector representation of a piece of text. This is useful because it means we can think about text in the vector space, and do things like semantic search where we look for pieces of text that are most similar in the vector space.

*Note* Instructor Model : [Huggingface](gingface.co/hkunlp/instructor-base) | [Paper](https://arxiv.org/abs/2212.09741)

In [13]:
# !pip install --upgrade langchain langchain_community

In [14]:
import torch
from langchain.embeddings import HuggingFaceInstructEmbeddings

model_name = 'hkunlp/instructor-base'

embedding_model = HuggingFaceInstructEmbeddings(
    model_name = model_name,
    model_kwargs = {"device" : device}
)

  from tqdm.autonotebook import trange


load INSTRUCTOR_Transformer
max_seq_length  512


### 2.4 Vector Stores

One of the most common ways to store and search over unstructured data is to embed it and store the resulting embedding vectors, and then at query time to embed the unstructured query and retrieve the embedding vectors that are 'most similar' to the embedded query. A vector store takes care of storing embedded data and performing vector search for you.

In [15]:
#locate vectorstore
vector_path = './vector-store'
if not os.path.exists(vector_path):
    os.makedirs(vector_path)
    print('create path done')

In [16]:
#save vector locally
from langchain.vectorstores import FAISS

vectordb = FAISS.from_documents(
    documents = doc,
    embedding = embedding_model
)

db_file_name = 'nlp_stanford'

vectordb.save_local(
    folder_path = os.path.join(vector_path, db_file_name),
    index_name = 'nlp' #default index
)

### 2.5 retrievers
A retriever is an interface that returns documents given an unstructured query. It is more general than a vector store. A retriever does not need to be able to store documents, only to return (or retrieve) them. Vector stores can be used as the backbone of a retriever, but there are other types of retrievers as well.

In [17]:
#calling vector from local
vector_path = './vector-store'
db_file_name = 'nlp_stanford'

from langchain.vectorstores import FAISS

vectordb = FAISS.load_local(
    folder_path = os.path.join(vector_path, db_file_name),
    embeddings = embedding_model,
    index_name = 'nlp', #default index
)   

In [18]:
#ready to use
retriever = vectordb.as_retriever()

In [19]:
retriever.get_relevant_documents("What is AIT")

[Document(page_content='and WellnessCultural diversityAthleticsCareer developmentFacilitiesAIT Conference CenterVirtual\ntourKey thematic areas Climate\nchangeSmart CommunitiesFood Energy WaterInfrastructureTechnology Policy Society Sustainability\nPartners News Events\nWork at AIT BOI STP Intranet Students Contact Giving Alumni\nEmergency contacts Covid management Home AboutAbout AITAIT is an international\nEnglish speaking postgraduate institution focusing on engineering environment and management\nstudies In this section About AIT Facts and figures Rankings\nLeadershipPeopleMeet our facultyMeet our staff LocationWelcome to AITThe Asian Institute', metadata={'source': './docs/ait_paragraphs.pdf', 'file_path': './docs/ait_paragraphs.pdf', 'page': 4, 'total_pages': 33, 'format': 'PDF 1.4', 'title': '(anonymous)', 'author': '(anonymous)', 'subject': '(unspecified)', 'keywords': '', 'creator': '(unspecified)', 'producer': 'ReportLab PDF Library - www.reportlab.com', 'creationDate': "D:20

In [20]:
retriever.get_relevant_documents("Where AIT located")

[Document(page_content='LocationCentersAcademicsAcademic calendarAcademic ProgramsStudy optionsStudent\nopportunitiesSchoolsApply to AITAdmissionsFinancial aidTuition and feesStudent\nhousingApply onlineResearchResearch CentersResearch ThemesResearch ProjectsResearch\nPublicationsCampus lifeHousingDiningHealth WellnessCultural diversityAthleticsCareer\ndevelopmentFacilitiesVirtual\ntour Partners News Events Key thematic areas Work at AIT\nBranding Contact A Z Units Asian Institute of Technology All\nRights Reserved Designed by OutsourcifyPrivacy PolicyTerms and conditionsWe use cookies\non our website to give you the most relevant experience\nby remembering your preferences and repeat visits By clicking Accept', metadata={'source': './docs/ait_paragraphs.pdf', 'file_path': './docs/ait_paragraphs.pdf', 'page': 21, 'total_pages': 33, 'format': 'PDF 1.4', 'title': '(anonymous)', 'author': '(anonymous)', 'subject': '(unspecified)', 'keywords': '', 'creator': '(unspecified)', 'producer': 'R

## 3. Memory

One of the core utility classes underpinning most (if not all) memory modules is the ChatMessageHistory class. This is a super lightweight wrapper that provides convenience methods for saving HumanMessages, AIMessages, and then fetching them all.

You may want to use this class directly if you are managing memory outside of a chain.


In [21]:
from langchain.memory import ChatMessageHistory

history = ChatMessageHistory()
history

ChatMessageHistory(messages=[])

In [22]:
history.add_user_message('hi')
history.add_ai_message('Whats up?')
history.add_user_message('How are you')
history.add_ai_message('I\'m quite good. How about you?')

In [23]:
history

ChatMessageHistory(messages=[HumanMessage(content='hi'), AIMessage(content='Whats up?'), HumanMessage(content='How are you'), AIMessage(content="I'm quite good. How about you?")])

### 3.1 Memory types

There are many different types of memory. Each has their own parameters, their own return types, and is useful in different scenarios. 
- Converstaion Buffer
- Converstaion Buffer Window

What variables get returned from memory

Before going into the chain, various variables are read from memory. These have specific names which need to align with the variables the chain expects. You can see what these variables are by calling memory.load_memory_variables({}). Note that the empty dictionary that we pass in is just a placeholder for real variables. If the memory type you are using is dependent upon the input variables, you may need to pass some in.

In this case, you can see that load_memory_variables returns a single key, history. This means that your chain (and likely your prompt) should expect an input named history. You can usually control this variable through parameters on the memory class. For example, if you want the memory variables to be returned in the key chat_history you can do:

#### Converstaion Buffer
This memory allows for storing messages and then extracts the messages in a variable.

In [24]:
# !python3 --version

In [25]:
# !pip show langchain_community

In [26]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory()
memory.save_context({'input':'hi'}, {'output':'What\'s up?'})
memory.save_context({"input":'How are you?'},{'output': 'I\'m quite good. How about you?'})
memory.load_memory_variables({})

{'history': "Human: hi\nAI: What's up?\nHuman: How are you?\nAI: I'm quite good. How about you?"}

In [27]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(return_messages = True)
memory.save_context({'input':'hi'}, {'output':'What\'s up?'})
memory.save_context({"input":'How are you?'},{'output': 'I\'m quite good. How about you?'})
memory.load_memory_variables({})

{'history': [HumanMessage(content='hi'),
  AIMessage(content="What's up?"),
  HumanMessage(content='How are you?'),
  AIMessage(content="I'm quite good. How about you?")]}

#### Conversation Buffer Window
- it keeps a list of the interactions of the conversation over time. 
- it only uses the last K interactions. 
- it can be useful for keeping a sliding window of the most recent interactions, so the buffer does not get too large.

In [28]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(k=1)
memory.save_context({'input':'hi'}, {'output':'What\'s up?'})
memory.save_context({"input":'How are you?'},{'output': 'I\'m quite good. How about you?'})
memory.load_memory_variables({})

{'history': "Human: How are you?\nAI: I'm quite good. How about you?"}

## 4. Chain

Using an LLM in isolation is fine for simple applications, but more complex applications require chaining LLMs - either with each other or with other components.

An `LLMChain` is a simple chain that adds some functionality around language models.
- it consists of a `PromptTemplate` and a `LM` (either an LLM or chat model).
- it formats the prompt template using the input key values provided (and also memory key values, if available), 
- it passes the formatted string to LLM and returns the LLM output.

Note : [Download Fastchat Model Here](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0)

In [29]:
# %cd ./models
# !git clone https://huggingface.co/lmsys/fastchat-t5-3b-v1.0

/home/koala/github/NLP/NLP-A7/models
fatal: destination path 'fastchat-t5-3b-v1.0' already exists and is not an empty directory.


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [30]:
# !pip install bitsandbytes==0.41.2

In [31]:
# !pip install bitsandbytes

In [32]:
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
from transformers import BitsAndBytesConfig
from langchain import HuggingFacePipeline
import torch

model_id = './fastchat-t5-3b-v1.0/'

tokenizer = AutoTokenizer.from_pretrained(
    model_id)

tokenizer.pad_token_id = tokenizer.eos_token_id

bitsandbyte_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.float16,
    bnb_4bit_use_double_quant = True
)

model = AutoModelForSeq2SeqLM.from_pretrained(
    model_id,
    quantization_config = bitsandbyte_config, #caution Nvidia
    device_map="cuda:0",
    load_in_8bit = True
)

#   load_in_8bit_fp32_cpu_offload=True,  # Adding this line
#     device_map={"cuda:0": "cpu"}  # Adding this line

pipe = pipeline(
    task="text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens = 256,
    model_kwargs = {
        "temperature" : 0,
        "repetition_penalty": 1.5
    }
)

llm = HuggingFacePipeline(pipeline = pipe)

2024-03-20 17:38:18.325502: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-20 17:38:18.700471: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was

### [Class ConversationalRetrievalChain](https://api.python.langchain.com/en/latest/_modules/langchain/chains/conversational_retrieval/base.html#ConversationalRetrievalChain)

- `retriever` : Retriever to use to fetch documents.

- `combine_docs_chain` : The chain used to combine any retrieved documents.

- `question_generator`: The chain used to generate a new question for the sake of retrieval. This chain will take in the current question (with variable question) and any chat history (with variable chat_history) and will produce a new standalone question to be used later on.

- `return_source_documents` : Return the retrieved source documents as part of the final result.

- `get_chat_history` : An optional function to get a string of the chat history. If None is provided, will use a default.

- `return_generated_question` : Return the generated question as part of the final result.

- `response_if_no_docs_found` : If specified, the chain will return a fixed response if no docs are found for the question.


`question_generator`

In [33]:
from langchain.chains import LLMChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain

In [34]:
CONDENSE_QUESTION_PROMPT

PromptTemplate(input_variables=['chat_history', 'question'], template='Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n\nChat History:\n{chat_history}\nFollow Up Input: {question}\nStandalone question:')

In [35]:
question_generator = LLMChain(
    llm = llm,
    prompt = CONDENSE_QUESTION_PROMPT,
    verbose = True
)

In [36]:
query = 'Summarize the answer'
chat_history = "Human:What is AIT\nAI:\nHuman:Which courses AIT offered\nAI:"

question_generator({'chat_history' : chat_history, "question" : query})

  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
Human:What is AIT
AI:
Human:Which courses AIT offered
AI:
Follow Up Input: Summarize the answer
Standalone question:[0m

[1m> Finished chain.[0m


{'chat_history': 'Human:What is AIT\nAI:\nHuman:Which courses AIT offered\nAI:',
 'question': 'Summarize the answer',
 'text': '<pad> What  are  the  courses  offered  by  AIT?\n'}

`combine_docs_chain`

In [37]:
doc_chain = load_qa_chain(
    llm = llm,
    chain_type = 'stuff',
    prompt = AIT_PROMPT,
    verbose = True
)
doc_chain

StuffDocumentsChain(verbose=True, llm_chain=LLMChain(verbose=True, prompt=PromptTemplate(input_variables=['context', 'question'], template="Welcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).\n    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!\n    {context}\n    Question: {question}\n    Answer:"), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text2text_generation.Text2TextGenerationPipeline object at 0x7950ca3c43d0>)), document_variable_name='context')

In [38]:
query = "who is dr chaklam"
input_document = retriever.get_relevant_documents(query)

doc_chain({'input_documents':input_document, 'question':query})



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWelcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).
    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!
    All Rights Reserved Designed by OutsourcifyPrivacy PolicyTerms and conditionsWe use
cookies on our website to give you the most relevant
experience by remembering your preferences and repeat visits By clicking
Accept All you consent to the use of ALL the
cookies However you may visit Cookie Settings to provide a
controlled consent Cookie SettingsAccept AllManage consent Close Privacy OverviewThis website
uses cookies to improve your experience while you navigate through
the website Out of these the cookies that are categorized
as necessary are stored on your browser as they

{'input_documents': [Document(page_content='All Rights Reserved Designed by OutsourcifyPrivacy PolicyTerms and conditionsWe use\ncookies on our website to give you the most relevant\nexperience by remembering your preferences and repeat visits By clicking\nAccept All you consent to the use of ALL the\ncookies However you may visit Cookie Settings to provide a\ncontrolled consent Cookie SettingsAccept AllManage consent Close Privacy OverviewThis website\nuses cookies to improve your experience while you navigate through\nthe website Out of these the cookies that are categorized\nas necessary are stored on your browser as they are\nessential for the working of basic functionalities of the website', metadata={'source': './docs/ait_paragraphs.pdf', 'file_path': './docs/ait_paragraphs.pdf', 'page': 19, 'total_pages': 33, 'format': 'PDF 1.4', 'title': '(anonymous)', 'author': '(anonymous)', 'subject': '(unspecified)', 'keywords': '', 'creator': '(unspecified)', 'producer': 'ReportLab PDF Lib

In [39]:
memory = ConversationBufferWindowMemory(
    k=3, 
    memory_key = "chat_history",
    return_messages = True,
    output_key = 'answer'
)

chain = ConversationalRetrievalChain(
    retriever=retriever,
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
    return_source_documents=True,
    memory=memory,
    verbose=True,
    get_chat_history=lambda h : h
)
chain

ConversationalRetrievalChain(memory=ConversationBufferWindowMemory(output_key='answer', return_messages=True, memory_key='chat_history', k=3), verbose=True, combine_docs_chain=StuffDocumentsChain(verbose=True, llm_chain=LLMChain(verbose=True, prompt=PromptTemplate(input_variables=['context', 'question'], template="Welcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).\n    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!\n    {context}\n    Question: {question}\n    Answer:"), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text2text_generation.Text2TextGenerationPipeline object at 0x7950ca3c43d0>)), document_variable_name='context'), question_generator=LLMChain(verbose=True, prompt=PromptTemplate(input_variables=['chat_history', 'question'], template='Given the following conversation and a follow up questi

## 5. Chatbot

In [40]:
prompt_question = "Who are you by the way?"
answer = chain({"question":prompt_question})
answer



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWelcome to the AIT Information Chatbot! I'm here to assist you with any questions you have about the Asian Institute of Technology (AIT).
    Whether you want to know about our academic programs, admissions process, campus facilities, or any other aspect of AIT, feel free to ask!
    All Rights Reserved Designed by OutsourcifyPrivacy PolicyTerms and conditionsWe use
cookies on our website to give you the most relevant
experience by remembering your preferences and repeat visits By clicking
Accept All you consent to the use of ALL the
cookies However you may visit Cookie Settings to provide a
controlled consent Cookie SettingsAccept AllManage consent Close Privacy OverviewThis website
uses cookies to improve your experience while you navigate through
the website Out of these the cookies that 

{'question': 'Who are you by the way?',
 'chat_history': [],
 'answer': "<pad>  I'm  the  AIT  Information  Chatbot!  I'm  here  to  assist  you  with  any  questions  you  have  about  the  Asian  Institute  of  Technology  (AIT).  Whether  you  want  to  know  about  our  academic  programs,  admissions  process,  campus  facilities,  or  any  other  aspect  of  AIT,  feel  free  to  ask!\n",
 'source_documents': [Document(page_content='All Rights Reserved Designed by OutsourcifyPrivacy PolicyTerms and conditionsWe use\ncookies on our website to give you the most relevant\nexperience by remembering your preferences and repeat visits By clicking\nAccept All you consent to the use of ALL the\ncookies However you may visit Cookie Settings to provide a\ncontrolled consent Cookie SettingsAccept AllManage consent Close Privacy OverviewThis website\nuses cookies to improve your experience while you navigate through\nthe website Out of these the cookies that are categorized\nas necessary are

In [41]:
prompt_question = "What is AIT"
answer = chain({"question":prompt_question})
answer



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
[HumanMessage(content='Who are you by the way?'), AIMessage(content="<pad>  I'm  the  AIT  Information  Chatbot!  I'm  here  to  assist  you  with  any  questions  you  have  about  the  Asian  Institute  of  Technology  (AIT).  Whether  you  want  to  know  about  our  academic  programs,  admissions  process,  campus  facilities,  or  any  other  aspect  of  AIT,  feel  free  to  ask!\n")]
Follow Up Input: What is AIT
Standalone question:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWelcome to the AIT Information Chatbot! I'm here to assist you with any questions yo

{'question': 'What is AIT',
 'chat_history': [HumanMessage(content='Who are you by the way?'),
  AIMessage(content="<pad>  I'm  the  AIT  Information  Chatbot!  I'm  here  to  assist  you  with  any  questions  you  have  about  the  Asian  Institute  of  Technology  (AIT).  Whether  you  want  to  know  about  our  academic  programs,  admissions  process,  campus  facilities,  or  any  other  aspect  of  AIT,  feel  free  to  ask!\n")],
 'answer': '<pad> < pad>Asian  Institute  of  Technology  (AIT)  is  a  private  research  university  located  in  Singapore.  It  offers  undergraduate  and  graduate  programs  in  various  fields  of  technology,  including  engineering,  computer  science,  and  business.  AIT  is  a  member  of  the  Association  of  Indian  Universities  (AIU)  and  is  a  member  of  the  Association  of  Indian  Universities  (AIU).\n',
 'source_documents': [Document(page_content='Asian Institute of Technology All Rights Reserved Designed by OutsourcifyPrivac

In [42]:
prompt_question = "Which courses AIT offered"
answer = chain({"question":prompt_question})
answer



[1m> Entering new ConversationalRetrievalChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
[HumanMessage(content='Who are you by the way?'), AIMessage(content="<pad>  I'm  the  AIT  Information  Chatbot!  I'm  here  to  assist  you  with  any  questions  you  have  about  the  Asian  Institute  of  Technology  (AIT).  Whether  you  want  to  know  about  our  academic  programs,  admissions  process,  campus  facilities,  or  any  other  aspect  of  AIT,  feel  free  to  ask!\n"), HumanMessage(content='What is AIT'), AIMessage(content='<pad> < pad>Asian  Institute  of  Technology  (AIT)  is  a  private  research  university  located  in  Singapore.  It  offers  undergraduate  and  graduate  programs  in  various  fields  of  technology,  including  engineering,  computer  sci

{'question': 'Which courses AIT offered',
 'chat_history': [HumanMessage(content='Who are you by the way?'),
  AIMessage(content="<pad>  I'm  the  AIT  Information  Chatbot!  I'm  here  to  assist  you  with  any  questions  you  have  about  the  Asian  Institute  of  Technology  (AIT).  Whether  you  want  to  know  about  our  academic  programs,  admissions  process,  campus  facilities,  or  any  other  aspect  of  AIT,  feel  free  to  ask!\n"),
  HumanMessage(content='What is AIT'),
  AIMessage(content='<pad> < pad>Asian  Institute  of  Technology  (AIT)  is  a  private  research  university  located  in  Singapore.  It  offers  undergraduate  and  graduate  programs  in  various  fields  of  technology,  including  engineering,  computer  science,  and  business.  AIT  is  a  member  of  the  Association  of  Indian  Universities  (AIU)  and  is  a  member  of  the  Association  of  Indian  Universities  (AIU).\n')],
 'answer': '<pad> < pad>\n Undergraduate  programs  offered  

In [44]:
import cloudpickle

# Dump the chain object
with open('conversational_chain_t5.pkl', 'wb') as f:
    cloudpickle.dump(chain, f)