# **Setting Up**

In [None]:
!pip install langchain
!pip install openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken
!pip install gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting langchain
  Downloading langchain-0.0.206-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)
  Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting async-timeout<5.0.0,>=4.0.0 (from langchain)
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.5.8-py3-none-any.whl (26 kB)
Collecting langchainplus-sdk>=0.0.13 (from langchain)
  Downloading langchainplus_sdk-0.0.16-py3-none-any.whl (24 kB)
Collecting openapi-schema-pydantic<2.0,>=1.2 (from langchain)
  Downloading o

In [None]:
# Connect to Google Drive for files
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd "/content/gdrive/MyDrive"

In [None]:
# Important Stuff
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
import os
import gradio as gr
os.environ["OPENAI_API_KEY"] = ""

# **Reading the PDF**

In [None]:
reader = PdfReader("/test/AS.pdf")

In [None]:
# Read Data from Text:
raw_text = ''

# **CharacterTextSplitter**

In [None]:
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text
# Split Text Data:
text_splitter = CharacterTextSplitter(
    separator= "\n",
    chunk_size = 1000,
    chunk_overlap = 200,
    length_function = len
)

In [None]:
texts = text_splitter.split_text(raw_text)

# **Embeddings**

In [None]:
embeddings = OpenAIEmbeddings()

# **FAISS - Facebook A.I. Similarity Search**

In [None]:
docsearch = FAISS.from_texts(texts, embeddings)

##**LangChain**

In [None]:
# Using ChatGPT-3.5-turbo for the document answering
llm = ChatOpenAI(temperature=0.0)

**Chain**

In [None]:
# Question & Answer Chain
chain = load_qa_chain(llm = llm, chain_type="stuff")

# **Creating Function for our Questions**

In [None]:
def ask_GPT(question):
    query = question
    docs = docsearch.similarity_search(query)
    response = chain.run(input_documents = docs, question=query)
    return response

In [None]:
# Ask Question Here!
ask_GPT("")

'First ionisation energy is the energy required to remove one mole of electrons from one mole of gaseous atoms in their ground state to form one mole of gaseous ions with a single positive charge (cations). It is a measure of the strength of attraction between the positively charged nucleus and the valence electron that is to be removed. First ionisation energies generally increase across a period as the number of protons and electrostatic attraction between the valence electrons and the nucleus increases.'

# **Web Interface**

In [None]:
demo = gr.Interface(
    fn = ask_GPT,
    inputs = gr.Textbox(lines = 2, placeholder="Enter your prompt: "),
    outputs = "text"
)
demo.launch(share = True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://b5f878b89eadf4723b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


