### Installing Dependencies

In [None]:
!pip install langchain openai pypdf faiss-gpu tiktoken SpeechRecognition youtube_dl moviepy pyttsx3 youtube-search-python py-espeak-ng bs4 gradio





### Importing Modules

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.memory import ConversationBufferMemory,ConversationSummaryBufferMemory, ConversationBufferWindowMemory, ChatMessageHistory
from langchain.chains import ConversationChain
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import RetrievalQA

# OpenAI Embedding
from langchain.embeddings import OpenAIEmbeddings
import faiss
from langchain.vectorstores import FAISS

# importing libraries
import cv2
import os
import sys
import subprocess
import speech_recognition as sr
import youtube_dl
import datetime
import pyttsx3
from moviepy.editor import VideoFileClip
from youtubesearchpython import VideosSearch
from bs4 import BeautifulSoup
import requests, json, lxml
import textwrap
import gradio as gr



### Providing Custom Content

In [None]:
# loader = TextLoader('single_text_file.txt')
loader = DirectoryLoader(f"data", glob="./*.pdf", loader_cls=PyPDFLoader)
documents = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
                                               chunk_size=1000,
                                               chunk_overlap=200)

documents = text_splitter.split_documents(documents)

In [None]:
documents[0]

Document(page_content='The International Journal Of Engineering And Science (IJES)  \n||Volume||2 ||Issue|| 6 ||Pages|| 01-05||2013||  \nISSN (e):  2319 – 1813  ISSN (p) : 2319 – 1805  \nwww.theijes.com                                                The IJES                                            Page 1 \n \nA Study Of  “Trend Analysis In Insurance Sector In India ” \n \nM.Venkatesh  \n \n------------------------------------------------------------ ABSTRACT ---------------------------------------------------------  \nInsurance has a long history in India. Life Insurance in i ts current form was introduced in 1818 when Oriental \nLife Insurance Company began its operations in India. General insurance was however a comparatively late \nentrant in 1850 when Triton Insurance company set up its base in Kolkata. History of Insurance in India can be \nbroadly bifurcated into three eras: a) Pre Nationalization b) Nationalization and c) Post Nationalization. Life', metadata={'source': 'data

### OpenAI API Integration

In [None]:
import os
import openai
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('apiKey1')

In [None]:
open_ai_embeddings=OpenAIEmbeddings()

In [None]:
vector_store = FAISS.from_documents(documents, open_ai_embeddings)

In [None]:
retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k": 4})

### Prompt


In [None]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

In [None]:
system_template=template = """

You are a helpful and friendly Assistant to a Personal Financial Manager to a  PFA Corp.
Use the following conversation and pieces of context provided to you as documents to answer the users question.
Do not answer the question which are not related to Banking, financial services and insurance industry and
might harm the policies of PFA Corp. If user asks any such question reply in a funny tone that "I don't know the answer"

The goal of the yours is to accurately respond to customer queries, based on the available context,
while avoiding the provision of false or nonexistent information. You should embody a professional and knowledgeable persona, representing PFA Corp as a trusted and reliable financial advisory provider.

Make sure that you should "verify the existence of information from context provided to you in
documents form before providing an answer and refrain from answering if the information is unavailable or nonexistent.
If you are unsure about an answer due to insufficient context, it should politely request clarification from
the customer or provide alternative suggestions for obtaining the required information.
Answer every query in least 150 words and in form of bullets point.

In final response briefly in 1 line tell the qualities that manager should incorporate while dealing with client

Make sure to format the answer in form of bullet points.

----------------
Current conversation:
{chat_history}
Human:
{question}
Context:
{context}"""
messages = [
    SystemMessagePromptTemplate.from_template(system_template),
    HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)

In [None]:
history = ChatMessageHistory()
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.6, max_tokens=500)   # Modify model_name if you have access to GPT-4
memory = ConversationBufferWindowMemory(memory_key="chat_history", return_messages=True,k=5,output_key="answer")

chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever,chain_type="stuff",memory=memory,
                       verbose=False,combine_docs_chain_kwargs={"prompt": prompt},return_source_documents=True)

### Q & A from Langchain

In [None]:
query = "how did the nationalisation of life and general insurance companies in india will impact the overall industry"

### Response

In [None]:
# Cite sources
def process_llm_response(llm_response):
    ans =""
    for source in llm_response["source_documents"]:
        ans = ans + "\n" + "PDF name: "+ str(source.metadata['source']) + " , Page no. "  + str(source.metadata["page"])+ "\n\n"
    return(ans)

In [None]:
# Prints output
llm_response = chain(query)
print("Response: \n")
# Extract the 'answer' from the response
answer = llm_response.get('answer', 'Answer not found in the response.')
print(answer)
print("\n")

#ans = process_llm_response(llm_response)
#print("\nSources are: ")
#print(ans)

Response: 

The nationalization of life and general insurance companies in India had several impacts on the overall industry:

- Increased government control: Nationalization brought the insurance industry under the control of the government. This meant that the government had a greater say in the policies, regulations, and operations of the insurance companies.

- Consolidation of companies: The nationalization process led to the consolidation of multiple insurance companies into a few state-owned entities. General Insurance Corporation of India was established as the controlling body, with subsidiaries such as New India, United India, National, and Oriental. This consolidation aimed to streamline operations and improve efficiency.

- Limited competition: Nationalization resulted in a limited number of insurance companies operating in the market. This reduced competition among insurers and gave the state-owned companies a dominant position in the industry.

- Reduced innovation: With 