# CHATBOT USING LANGCHAIN AND PINECONE

In [2]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']
pinecone_api_key = os.environ['PINECONE_API_KEY']

# Document loader

In [3]:
#json file path
file_path='dataset_final_with_questions.json'


In [4]:
from pprint import pprint
import json
# load json file and view the contents
with open(file_path) as file:
    data_set = json.load(file)
pprint(data_set)

[{'source': 'https://www.canada.ca/en/immigration-refugees-citizenship/services/immigrate-canada/express-entry/apply-permanent-residence/check-your-status.html',
  'subheadings': [{'content': '1. Fill out the online form\n'
                              '2. Scan and upload the documents\n'
                              '3. Pay your fees\n'
                              '4. Submit your complete application\n'
                              '5. After you apply\n'
                              'Check your application status',
                   'questions': ['1. What are the steps involved in the '
                                 'Express Entry application process?',
                                 '2. How can I check the status of my Express '
                                 'Entry application?',
                                 '3. What is the first step in the Express '
                                 'Entry application process?',
                                 '4. What documents 

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [5]:
"""Loader that loads data from JSON."""

from pathlib import Path
from typing import Callable, Dict, List, Optional, Union

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader


class JSONLoader(BaseLoader):
    def __init__(
        self,
        file_path: Union[str, Path],
        content_key: Optional[str] = None,
        ):
        self.file_path = Path(file_path).resolve()
        self._content_key = content_key
        
    def load(self) -> List[Document]:
        """Load and return documents from the JSON file."""

        docs=[]
        # Load JSON file
        with open(self.file_path) as file:
            data = json.load(file)

        # Iterate through 'pages'
        for c in data:
            parenturl = c['source']
            pagetitle = c['title']
            snippets = c['subheadings']
            metadata={"title":pagetitle, "source": parenturl}
            
            print(pagetitle)
            # # Process snippets for each page
            for snippet in snippets:
                content = snippet['content']
                subheading = snippet['subheading']
                docs.append(Document(page_content=content, metadata=metadata))
        return docs


In [6]:
# load the data using JSONLoader and printing the page tiles
loader = JSONLoader(file_path=file_path)
data = loader.load()


Express Entry: Check your application status
Immigrate to Canada
Documents for Express Entry
Express Entry: Check your application status
Welcome to Canada: Finding a place to live
Housing and accommodations in Canada
How the Provincial Nominee Program (PNP) works
Immigrate as a provincial nominee
Immigrate as a provincial nominee (non-Express Entry process)
Immigrate as a provincial nominee through Express Entry
IRCC Help Centre
Study permit: Get the right documents
Study permit: How to apply
Study permit: Prepare for arrival
The Proof of Relationship to a Relative in Canada
Open work permit: Applying as a sponsored spouse or common-law partner in Canada
Canada visa interview questions


In [7]:
pprint(data[:5])

[Document(page_content='1. Fill out the online form\n2. Scan and upload the documents\n3. Pay your fees\n4. Submit your complete application\n5. After you apply\nCheck your application status', metadata={'title': '\nExpress Entry: Check your application status', 'source': 'https://www.canada.ca/en/immigration-refugees-citizenship/services/immigrate-canada/express-entry/apply-permanent-residence/check-your-status.html'}),
 Document(page_content='Starting February 8, 2023, use the application status tracker(opens in a new tab)\xa0 to get the most up-to-date and detailed information about the status of your application.\nYou have 60 days after you’ve been invited to apply to submit your application for permanent residence.\nOnce you have submitted your application, you can check its status in your account.\nGo to “View the applications you submitted”\nClick “Application status and messages”\nOn the “Application status and messages” page, you’re able to see\nthe overall status of your appl

In [8]:
# split based on characters
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap = 150
)

In [9]:
# splittng the document to chunks

splits = text_splitter.split_documents(data)

In [10]:
# Sanity check
len(data), len(splits)

(3674, 5896)

# Vector Database - Pinecone

Pinecone is a vector database to store the embeddings. It has many advantages like storing querrying embeddings quickly and at scale. 
https://docs.pinecone.io/docs/overview

In [56]:
from tqdm.autonotebook import tqdm
import pinecone

In [57]:
# api key from the pinecone account - free tier allows 1 index
# Before creating embeddings, index should be created in Pinecone 
# Since "all-MiniLM-L6-v2" model is used for embedding, index should be created with dimension 384
pinecone.init(api_key=pinecone_api_key, 
              environment="gcp-starter")

## Embeddings

In [13]:
# Embeddings extracted from pretrained model in hugging face

from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

In [14]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False)

In [58]:
index_name = "chatbot"

In [59]:
from langchain.vectorstores import Pinecone
# run below line only if you want to add embedding to the vector database(in this case pinecone)

# vector_store = Pinecone.from_texts([split.page_content for split in splits], embeddings, index_name=index_name)

In [17]:
# load embeddings from existing index in vector database
vector_store = Pinecone.from_existing_index(index_name=index_name, embedding = embeddings)

# Retrieval Augmented Generation (RAG) flow

In [130]:
# initializing openai chat model
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm.predict("Hello world!")

'Hello! How can I assist you today?'

In [131]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """You are a virtual assistant to help users about questions on Canadian immigration process. \
      Use the following pieces of context to answer the question at the end.\
      If you don't know the answer, just say that you don't know, don't try to make up an answer.\
      Keep the answer as concise as possible. \
      Add bullet points in the reply while explaining a process or steps \
         
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)


In [25]:
query_1 = "What is express entry"
query_2 = "I'm a student. Can I apply under that category?"

In [26]:
# Run chain
from langchain.chains import RetrievalQA
# question_1 = "I got PR yesterday. Can I apply sponsorship for my wife and kid?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vector_store.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})

In [27]:
result_1 = qa_chain({"query": query_1})
print(result_1["result"])

Express Entry is an online system used by the Canadian government to manage applications for permanent residence from skilled workers. It is a competitive immigration program that allows individuals to submit their profiles and be considered for various economic immigration programs, such as the Federal Skilled Worker Program, the Federal Skilled Trades Program, and the Canadian Experience Class. The Express Entry system uses a points-based system called the Comprehensive Ranking System (CRS) to rank candidates based on factors such as age, education, work experience, language proficiency, and adaptability. Candidates with the highest CRS scores are invited to apply for permanent residence through regular draws conducted by the government.


In [29]:
result_2 = qa_chain({"query": query_2})
print(result_2["result"])

Yes, as a student, you may be eligible to apply for Canadian immigration under certain categories.


# Simple chatbot

In [133]:
# while True:
#     prompt = input(prompt="User: ")
#     # print()
#     result = qa({"question": prompt})
#     print("Bot: " + result["answer"])
#     # print()

# Adding Memory

In [126]:
from langchain.memory import ConversationBufferWindowMemory,ConversationBufferMemory 
from langchain.chains import ConversationChain
# memory = ConversationBufferWindowMemory(memory_key="chat_history", verbose = True)

In [127]:
from langchain.prompts.prompt import PromptTemplate

custom_template = """
you are a virtual assistant to help users about questions on Canadian immigration process. \
Use the following pieces of context to answer the question at the end.\
If you don't know the answer, just say that you don't know, don't try to make up an answer.\
Keep the answer as concise as possible. \
Given the following conversation and a follow up question, \
rephrase the follow up question to be a standalone question, in its original language\
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(template = custom_template,)

In [128]:
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

In [129]:
from langchain.chains import ConversationalRetrievalChain
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
memory = ConversationBufferWindowMemory(k = 2, memory_key="chat_history",return_messages=True, verbose = True)
qa = ConversationalRetrievalChain.from_llm(
    llm,
    vector_store.as_retriever(),
    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
    memory=memory
)

In [125]:
result = qa({"question": "What are the documents needed to apply student visa", })

In [109]:
print(result['answer'])

To apply for a student visa, you will typically need the following documents:

1. Valid passport: You will need a valid passport that does not expire during your intended stay in the country.

2. Letter of acceptance: You will need a copy of the letter of acceptance from the educational institution where you have been accepted to study.

3. Proof of financial support: You may need to provide evidence that you have enough funds to cover your tuition fees, living expenses, and other costs during your studies.

4. Proof of ties to your home country: You may need to demonstrate that you have strong ties to your home country, such as family, property, or employment, to show that you have reasons to return after completing your studies.

5. Medical examination: Some countries may require you to undergo a medical examination to ensure that you are in good health.

6. Biometrics: Depending on your citizenship, you may need to provide biometric information, such as fingerprints and a photo.

7.

In [94]:
# query = "What is express entry"
result = qa({"question": query_1})

In [65]:
print(result['answer'])

Express Entry is an online system used by the Government of Canada to manage applications for permanent residence from skilled workers. It is a competitive immigration program that allows individuals to submit their profiles and be considered for immigration to Canada based on factors such as age, education, work experience, language proficiency, and other criteria. The Express Entry system is used to select candidates for three main economic immigration programs: the Federal Skilled Worker Program, the Federal Skilled Trades Program, and the Canadian Experience Class.


In [66]:
# query = "I'm a student. Can I apply under that category?"
result = qa({"question": query_2})

In [67]:
print(result['answer'])

Yes, students can apply under the Express Entry category if they meet the requirements of one of the three federal programs covered by Express Entry. However, it is important to note that having a nomination from a province or territory can also be beneficial for students applying through Express Entry.


In [68]:
result

{'question': "I'm a student. Can I apply under that category?",
 'chat_history': [HumanMessage(content='What is express entry', additional_kwargs={}, example=False),
  AIMessage(content='Express Entry is an online system used by the Government of Canada to manage applications for permanent residence from skilled workers. It is a competitive immigration program that allows individuals to submit their profiles and be considered for immigration to Canada based on factors such as age, education, work experience, language proficiency, and other criteria. The Express Entry system is used to select candidates for three main economic immigration programs: the Federal Skilled Worker Program, the Federal Skilled Trades Program, and the Canadian Experience Class.', additional_kwargs={}, example=False)],
 'answer': 'Yes, students can apply under the Express Entry category if they meet the requirements of one of the three federal programs covered by Express Entry. However, it is important to note t

In [70]:
query_3 = "Thanks for the answer"
result = qa({"question": query_3})

In [72]:
print(result['answer'])

The minimum requirements for students to apply under the Express Entry category include:

1. Meeting the minimum requirements for one or more Express Entry programs.
2. Having work experience that meets the requirements of the Federal Skilled Worker Program, which includes being paid by wages or commissions, having continuous employment with no gaps, and meeting all other program requirements.
3. Taking a language test to demonstrate proficiency in listening, speaking, reading, and writing. The test results must be included in the Express Entry profile.
4. Meeting the eligibility criteria of the specific programs, such as the Federal Skilled Worker Program, Canadian Experience Class, or Federal Skilled Trades Program.


In [73]:
result

{'question': 'Thanks for the answer',
 'chat_history': [HumanMessage(content="I'm a student. Can I apply under that category?", additional_kwargs={}, example=False),
  AIMessage(content='Yes, students can apply under the Express Entry category if they meet the requirements of one of the three federal programs covered by Express Entry. However, it is important to note that having a nomination from a province or territory can also be beneficial for students applying through Express Entry.', additional_kwargs={}, example=False),
  HumanMessage(content='Thanks for the answer', additional_kwargs={}, example=False),
  AIMessage(content='To apply under the Express Entry category as a student, you must meet the minimum requirements for one or more Express Entry programs. Specifically, for the Federal Skilled Worker Program, student work experience gained while studying in Canada or abroad can count towards the minimum requirements if the work was paid by wages or commissions, was continuous wi

In [75]:
query_4 = "Can you provide the link for the application process"
result = qa({"question": query_3})

In [78]:
print(result['answer'])

The minimum requirements for students to apply under the Express Entry category include:

1. Meeting the minimum requirements for one or more Express Entry programs.
2. Having work experience that meets the requirements of the Federal Skilled Worker Program, which includes being paid by wages or commissions, having continuous employment with no gaps, and meeting all other program requirements.
3. Taking a language test to demonstrate proficiency in listening, speaking, reading, and writing. The test results must be included in the Express Entry profile.
4. Meeting the specific eligibility criteria of the program(s) you are applying for, such as the Federal Skilled Worker Program, Canadian Experience Class, or Federal Skilled Trades Program.


In [153]:
result = qa({"question": question_1})

In [154]:
print(result['answer'])

Yes, if your spouse and dependent child are eligible to be sponsored and qualify for the sponsorship program, you can apply to sponsor them. There is no special process for sponsoring family members, so you would just apply using the normal process to sponsor your spouse and child.


In [155]:
result = qa({"question": question_2})
print(result['answer'])

The process for sponsoring your spouse and child is the same as the normal sponsorship process. You can apply to sponsor them using the regular sponsorship application.


In [156]:
question = " Can you explain the sponsorship process?"
qa({"question": question})

{'question': ' Can you explain the sponsorship process?',
 'chat_history': [HumanMessage(content='I got PR yesterday. Can I apply sponsorship for my wife and kid?', additional_kwargs={}, example=False),
  AIMessage(content='Yes, if your spouse and dependent child are eligible to be sponsored and qualify for the sponsorship program, you can apply to sponsor them. There is no special process for sponsoring family members, so you would just apply using the normal process to sponsor your spouse and child.', additional_kwargs={}, example=False),
  HumanMessage(content='How should I apply for that?', additional_kwargs={}, example=False),
  AIMessage(content='The process for sponsoring your spouse and child is the same as the normal sponsorship process. You can apply to sponsor them using the regular sponsorship application.', additional_kwargs={}, example=False)],
 'answer': 'The process for sponsoring your spouse and child is the same as the normal process for sponsorship. You can apply to 

In [157]:
result = qa({"question": question})

"To sponsor your spouse and child, you will need to go through the sponsorship process. Here is a detailed explanation of the steps involved:\n\n1. Determine your eligibility: Before starting the process, make sure you meet the eligibility requirements to sponsor your spouse and child. This includes being a Canadian citizen or permanent resident, being at least 18 years old, and meeting certain financial requirements.\n\n2. Gather the required documents: You will need to gather various documents to support your sponsorship application. This may include proof of your relationship with your spouse (such as marriage certificate or proof of common-law relationship), proof of your Canadian citizenship or permanent residency, proof of financial support, and other supporting documents.\n\n3. Complete the application forms: You will need to fill out the appropriate application forms for sponsoring your spouse and child. This includes the sponsorship application form, the permanent residence ap

In [158]:
print(result['answer'])

To sponsor your spouse and child, you will need to go through the sponsorship process. Here is a detailed explanation of the steps involved:

1. Determine your eligibility: Before starting the process, make sure you meet the eligibility requirements to sponsor your spouse and child. This includes being a Canadian citizen or permanent resident, being at least 18 years old, and meeting certain financial requirements.

2. Gather the required documents: You will need to gather various documents to support your sponsorship application. This may include proof of your relationship with your spouse (such as marriage certificate or proof of common-law relationship), proof of your Canadian citizenship or permanent residency, proof of financial support, and other supporting documents.

3. Complete the application forms: You will need to fill out the appropriate application forms for sponsoring your spouse and child. This includes the sponsorship application form, the permanent residence applicati

In [159]:
result

{'question': ' Can you explain the sponsorship process?',
 'chat_history': [HumanMessage(content='How should I apply for that?', additional_kwargs={}, example=False),
  AIMessage(content='The process for sponsoring your spouse and child is the same as the normal sponsorship process. You can apply to sponsor them using the regular sponsorship application.', additional_kwargs={}, example=False),
  HumanMessage(content=' Can you explain the sponsorship process?', additional_kwargs={}, example=False),
  AIMessage(content='The process for sponsoring your spouse and child is the same as the normal process for sponsorship. You can apply to sponsor them using the regular sponsorship application.', additional_kwargs={}, example=False)],
 'answer': "To sponsor your spouse and child, you will need to go through the sponsorship process. Here is a detailed explanation of the steps involved:\n\n1. Determine your eligibility: Before starting the process, make sure you meet the eligibility requirement

In [162]:
# qa({"question": query, "chat_history": chat_history})

# Working on memory part