In [42]:
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain.prompts import ChatPromptTemplate
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain_astradb import AstraDBVectorStore
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
llm = ChatGoogleGenerativeAI(model = "gemini-1.5-pro",temperature = 0)

print(llm.invoke("What is the capital of India").content)

The capital of India is **New Delhi**. 



In [27]:
def load_pdf(data):
    loader = DirectoryLoader(data,glob = "*.pdf",loader_cls = PyPDFLoader)
    docs = loader.load()
    return docs

In [28]:
documents = load_pdf("data/")
len(documents)

2

In [29]:
def convert_to_chunks(documents):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 50,chunk_overlap = 10)
    chunks = splitter.split_documents(documents)
    return chunks

In [30]:
chunks = convert_to_chunks(documents)
len(chunks)

107

In [31]:
load_dotenv()

ASTRA_API = os.getenv("ASTRA_API")
DB_ENDPOINT = os.getenv("ASTRA_DB_ENDPOINT")

embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

In [32]:
def ingest(status):
    vector_store = AstraDBVectorStore(embedding = embeddings,
                                      token = ASTRA_API,
                                      api_endpoint = DB_ENDPOINT,
                                      namespace = "test",
                                      collection_name = "Resume_Parser")
    
    if status == None:
        inserted_ids = vector_store.add_documents(chunks)
    else:
        return vector_store
    return vector_store

In [33]:
vector_store = ingest(None)

In [34]:
vector_store.similarity_search("skills")

[Document(metadata={'source': 'data\\My_Resume.pdf', 'page': 0}, page_content='SKILLS'),
 Document(metadata={'source': 'data\\My_Resume.pdf', 'page': 0}, page_content='SKILLS'),
 Document(metadata={'source': 'data\\RahulCV.pdf', 'page': 0}, page_content='Skills\n•Problem Solving\n•Multitasking Abilities'),
 Document(metadata={'source': 'data\\RahulCV.pdf', 'page': 0}, page_content='team building, critical thinking skills. Seeking')]

In [35]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

template = """You are a Resume Reading expert who can answer any query (related multiple resume) to the Context: {context}
Question: {question} 
"""

prompt=ChatPromptTemplate.from_template(template)

In [36]:
prompt=ChatPromptTemplate.from_template(template)


In [41]:
chain = ({"context":retriever,"question":RunnablePassthrough()}|prompt|llm|StrOutputParser())
response = chain.invoke("What is the educational background of the applicants?")
print(response)

Based on the provided context, here's what we know about the applicant's education:

* **Applicant 1 (presumably Rahul from "RahulCV.pdf"):** We don't have enough information. The context mentions "challenging internship environment" and "Education" but doesn't specify the degree, field of study, or institution. 
* **Applicant 2 (from "My_Resume.pdf"):**  This applicant attended **GITAM University** and was pursuing a **Bachelor of Technology**.  We need more context to know the specific field of study within the B.Tech program. 

**To get a complete picture of the applicants' education, we need more information from their resumes, such as:**

* **Applicant 1:**
    * Name of the university/college attended
    * Degree earned (e.g., Bachelor of Science, Master of Arts)
    * Major/field of study
    * Graduation year or expected graduation year
    * GPA (optional)
* **Applicant 2:**
    *  Major/field of study within the Bachelor of Technology program
    * Graduation year or expecte

TRYING FROM RETRIEVAL CHAIN

In [43]:
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=False)

In [44]:
warning = "If you don't know the answer, just say that you don't know, don't try to make up an answer"


job_description = "MS or PhD in computer science or a related technical field,5+ years of industry work experience. Good sense of product with a focus on shipping user-facing data-driven features, Expertise in Python and Python based ML/DL and Data Science frameworks. \
Excellent coding, analysis, and problem-solving skills. Proven knowledge of data structure and algorithms. \
Familiarity in relevant machine learning frameworks and packages such as Tensorflow, PyTorch and HuggingFace\
Experience working with Product Management and decomposing feature requirements into technical work items to ship products\
Experience with generative AI, knowledge of ML Ops and ML services is a plus. This includes Pinecone, LangChain, Weights and Biases etc. \
Familiarity with deployment technologies such as Docker, Kubernetes and Triton are a plus\
Strong communication and collaboration skills"

question = warning+job_description + " Based on the given job description"
query = question + "short list resumes which is good fit based on skills,education and work experience mwntioned in it? also provide the candidate name which will be mentioned in first line of pdf without subheading"
# query = "short list resumes which is good fit for Data analysis roles based on skills,education and work experience mwntioned in it?"

llm_response = qa_chain(query)

  warn_deprecated(


In [45]:
\

{'query': "If you don't know the answer, just say that you don't know, don't try to make up an answerMS or PhD in computer science or a related technical field,5+ years of industry work experience. Good sense of product with a focus on shipping user-facing data-driven features, Expertise in Python and Python based ML/DL and Data Science frameworks. Excellent coding, analysis, and problem-solving skills. Proven knowledge of data structure and algorithms. Familiarity in relevant machine learning frameworks and packages such as Tensorflow, PyTorch and HuggingFaceExperience working with Product Management and decomposing feature requirements into technical work items to ship productsExperience with generative AI, knowledge of ML Ops and ML services is a plus. This includes Pinecone, LangChain, Weights and Biases etc. Familiarity with deployment technologies such as Docker, Kubernetes and Triton are a plusStrong communication and collaboration skills Based on the given job descriptionshort 