#**ChatGpt+ Pinecone + LangChain**

##**Step 1: Install All the Required Pakages**

In [None]:
!pip install langchain
!pip install pypdf
!pip install unstructured
!pip install sentence_transformers
!pip install pinecone-client
!pip install chatgpt openai
!pip install llama-cpp-python
!pip install huggingface_hub
!pip install PyMuPDF



#**Step 2: Import All the Required Libraries**

In [None]:
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from sentence_transformers import SentenceTransformer
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from huggingface_hub import hf_hub_download
from langchain.chains.question_answering import load_qa_chain
import os
import sys

import openai
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
import pinecone
import fitz

#**Step 3: Load the Data**

In [None]:
# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
    text = ""
    doc = fitz.open(pdf_file)
    for page_num in range(doc.page_count):
        page = doc[page_num]
        text += page.get_text()
    return text

# Directory containing the PDF documents
pdf_directory = "path to resumes"

# Initialize an empty list to store the extracted text
text_list = []

# Iterate over PDF files in the directory
for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        pdf_path = os.path.join(pdf_directory, filename)
        extracted_text = extract_text_from_pdf(pdf_path)
        text_list.append(extracted_text)



#**Step 5: Setup the Environment**

In [None]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "HUGGINGFACEHUB_API_TOKEN"
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY', 'PINECONE_API_KEY')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'gcp-starter')

#**Step 6: Download the Embeddings**

In [None]:
embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

#**Step 7: Initializing the Pinecone**

In [None]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "langchianpinecone" # put in the name of your pinecone index here

#**Step 8: Create Embeddings for all resumes and storing in Pinecone**

In [None]:
docsearch=Pinecone.from_texts(text_list, embeddings, index_name=index_name)

#**Step 9: Extracting text form the given pdf**

In [None]:
# Specify the path to your PDF file
pdf_file_path = "path to resume"

# Initialize an empty string to store the extracted text
extracted_text = ""

# Open the PDF file
doc = fitz.open(pdf_file_path)

# Iterate through the pages and extract text
for page_num in range(doc.page_count):
    page = doc.load_page(page_num)
    extracted_text += page.get_text()

#**Step 10: finding the similar resume from the given resume**

In [None]:
docs=docsearch.similarity_search(extracted_text,1)

#**Step 11: Loading Chatgpt Model**

In [None]:
llm=ChatOpenAI(model="gpt-3.5-turbo")

In [None]:
#Loading the chain to use llm
chain=load_qa_chain(llm, chain_type="stuff")

In [None]:
#custom query
query  =" Summarize the resume in json format"

In [None]:
#Running the chain on custom query
chain.run(input_documents=docs, question=query)

'{\n  "Career Focus": "To work in a professional atmosphere and gain experience in the aerospace industry",\n  "Professional Experience": [\n    {\n      "Job Title": "Engineering Lab Technician",\n      "Start Date": "Oct 2016",\n      "End Date": "Current",\n      "Company Name": "Company Name",\n      "City": "City",\n      "State": "State",\n      "Responsibilities": [\n        "Testing various seat structures to meet certification requirements",\n        "Maintaining and calibrating test instruments",\n        "Capturing and recording data for certification test reports",\n        "Dynamic test set-up and static suite testing"\n      ]\n    },\n    {\n      "Job Title": "Engineering Lab Technician, Sr. Specialist",\n      "Start Date": "Apr 2012",\n      "End Date": "Oct 2016",\n      "Company Name": "Company Name",\n      "City": "City",\n      "State": "State",\n      "Responsibilities": [\n        "Constructing and maintaining LabView VI programs",\n        "Fabricating and mai