Installing Libraries


In [1]:
!pip install langchain
!pip install openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken

Collecting langchain
  Downloading langchain-0.1.11-py3-none-any.whl (807 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m807.5/807.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.25 (from langchain)
  Downloading langchain_community-0.0.27-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1.29 (from langchain)
  Downloading langchain_core-0.1.30-py3-none-any.whl (256 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m256.9/256.9 kB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downloa

Importing Libraries

In [2]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

Constants


In [3]:
import os
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY_HERE"

In [14]:
pdfreader = PdfReader('ml.pdf')

In [15]:
from typing_extensions import Concatenate
raw_text = ''
for i, page in enumerate(pdfreader.pages):
  content =  page.extract_text()
  if content:
    raw_text += content

In [16]:
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 800,
    chunk_overlap = 200,
    length_function = len,
)

texts = text_splitter.split_text(raw_text)

In [17]:
len(texts)

122

Download embeddings from OpenAI

In [18]:
embeddings = OpenAIEmbeddings()

In [19]:
document_search = FAISS.from_texts(texts, embeddings)

In [20]:
document_search

<langchain_community.vectorstores.faiss.FAISS at 0x7edce4475f60>

In [21]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

In [22]:
chain = load_qa_chain(OpenAI(), chain_type='stuff')

In [24]:
query = "What are the goals of this paper"
docs = document_search.similarity_search(query)
chain.run(input_documents=docs, question=query)

' The goals of this paper are to review fundamental concepts of machine learning, discuss its relationship with standard psychological methods, and give concrete guidelines to implement machine learning projects in R.'

In [25]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive
