In [None]:
#importing libraries
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

In [None]:
#environment variables
load_dotenv()
PINECONE_API_KEY=os.environ.get('PINECONE_API_KEY')
os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY
GROQ_API_KEY=os.environ.get('GROQ_API_KEY')
os.environ["GROQ_API_KEY"]=GROQ_API_KEY

In [None]:
#extracting pdf data
loader = PyPDFLoader("Amina Akhtar Resume.pdf")
document=loader.load()
len(document)

1

In [None]:
split_document = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
text = split_document.split_documents(document)
print(len(text))
print(text)
texts = [t.page_content for t in text]
print(texts)

3
[Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-06-07T18:10:08+00:00', 'author': '', 'keywords': '', 'moddate': '2025-06-07T18:10:08+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'Amina Akhtar Resume.pdf', 'total_pages': 1, 'page': 0, 'page_label': '1'}, page_content='Amina Akhtar\n/ne+92(325)9857507 | aminaakhtar011@gmail.com | /♀nednLinkedIn | /gtbGitHub | Portfolio\nEducation\nFAST NUCES Lahore CGPA: 3.59/4\nBachelor of Science in Software Engineering Aug. 2022 – June 2026\n• Bronze Medal Spring 2023\n• 4X Dean’s List of Honors\nProjects\nAI-Plant ID| Python, TensorFlow, Keras\n• Developed an AI model that accurately identifies plant species from the user-uploaded images.\n• Achieved robust model performance through an ensemble of ResNet50, EfficientNetB0, and MobileNetV2\nconvolutional neural ne

In [None]:
#generate mebeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
result = embeddings.embed_documents(texts)
print(result)

  embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
  from .autonotebook import tqdm as notebook_tqdm


[[-0.13233940303325653, -0.011175202205777168, -0.010113360360264778, 0.02385728992521763, 0.06673580408096313, 0.002071455819532275, -0.06333838403224945, -8.529413025826216e-05, -0.03908700495958328, -0.0531352199614048, -0.04461255297064781, -0.05987532064318657, 0.021224835887551308, 0.005893821828067303, -0.08839032053947449, 0.035437460988759995, 0.03163004294037819, 0.02316986583173275, -0.06537973880767822, -0.07903663069009781, -0.009716751985251904, 0.038021571934223175, 0.026114128530025482, -0.04897482693195343, -0.0063180807046592236, 0.00295452494174242, -0.05484567955136299, -0.0189552940428257, -0.018184417858719826, -0.06183205544948578, 0.05567941442131996, 0.05195579677820206, 0.026469670236110687, 0.050121791660785675, -0.06342662870883942, 0.04643629491329193, -0.04466728866100311, -0.041271381080150604, 0.03495766595005989, -0.07679134607315063, -0.004714201204478741, -0.04754752293229103, -0.05205792933702469, 0.02774779684841633, 0.10158871114253998, -0.00923290

In [None]:
pc = Pinecone(api_key=PINECONE_API_KEY)

index_name = "custom-chatbot"
if index_name not in [index.name for index in pc.list_indexes()]:
    pc.create_index(       
       name=index_name,
       dimension=384, 
       metric="cosine", 
       spec=ServerlessSpec(
         cloud="aws", 
         region="us-east-1"
    ) 
) 
else:
     print(f"Index '{index_name}' already exists.")

Index 'custom-chatbot' already exists.


In [None]:
#store embeddings
index_data = PineconeVectorStore.from_documents(
    documents=text,
    index_name=index_name,
    embedding=embeddings, 
)

In [None]:
#load embeddings
load_document = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)
print(load_document)

<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000020A1F981600>


In [None]:
retriever = load_document.as_retriever(search_type="similarity", search_kwargs={"k":3})

In [10]:
retrieved_docs = retriever.invoke("What are interests of Amina")
print(retrieved_docs)

[Document(id='987c4b2b-c5ae-4457-8922-7361dcd59b8b', metadata={'author': '', 'creationdate': '2025-06-07T18:10:08+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2025-06-07T18:10:08+00:00', 'page': 0.0, 'page_label': '1', 'producer': 'pdfTeX-1.40.26', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'source': 'Amina Akhtar Resume.pdf', 'subject': '', 'title': '', 'total_pages': 1.0, 'trapped': '/False'}, page_content='Amina Akhtar\n/ne+92(325)9857507 | aminaakhtar011@gmail.com | /♀nednLinkedIn | /gtbGitHub | Portfolio\nEducation\nFAST NUCES Lahore CGPA: 3.59/4\nBachelor of Science in Software Engineering Aug. 2022 – June 2026\n• Bronze Medal Spring 2023\n• 4X Dean’s List of Honors\nProjects\nAI-Plant ID| Python, TensorFlow, Keras\n• Developed an AI model that accurately identifies plant species from the user-uploaded images.\n• Achieved robust model performance through an ensemble of ResNet50, EfficientNe

In [None]:
#delete embeddings from index
index = pc.Index(index_name)
index.delete(delete_all=True)

{}

In [None]:
llm = ChatGroq(
    model="llama-3-70b-instruct",
    temperature=0.5,
    max_tokens=None
)

In [None]:
prompt="You are an expert assistant to answer the question based on the pdf content. Use only the following extracted information to answer the questions.If you do not know the answer say: I could not find any relevant information in the document. Keep the answers concise, use 3 sentences maximum." \
"{context}"

In [None]:
template=ChatPromptTemplate.from_messages(
    [
        ("system", prompt),
        ("human", "{input}"),
    ]
)

In [None]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",#combine retrieved documents to send to LLM
    retriever=retriever
)

response = qa_chain.invoke({"query": "What do you know about amina"})
print(response["result"])

In [None]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key='answer' ,
    k=3
)
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm, 
    retriever=retriever,
    memory=memory
)

response = qa_chain({"question": "give me some skills of amina"})
print(response["answer"])

response = qa_chain({"question": "separate her web development skills"})
print(response["answer"])

Based on the provided context, here are some skills that Amina Akhtar possesses:

1. **Programming Languages**:
	* Java
	* Python
	* C/C++
	* JavaScript
	* HTML/CSS
2. **Frameworks**:
	* React
	* Node.js
	* Express.js
	* JUnit
3. **Databases**:
	* SQL
	* MySQL
	* MongoDB
4. **Developer Tools**:
	* Git
	* Postman
	* VS Code
	* Jupyter Notebook
	* IntelliJ
	* Eclipse OCL
	* Figma
	* Maze
5. **Libraries**:
	* pandas
	* NumPy
	* Matplotlib
	* TensorFlow
	* Keras
6. **Other Skills**:
	* AI and Machine Learning (developed an AI model for plant species identification)
	* Full-stack web development (developed a web application using MERN stack)
	* Client-Server Architecture
	* Multi-threading
	* User authentication and authorization
	* Data storage and management
	* Testing and validation (using JUnit)
Amina's web development skills include:

1. Languages: JavaScript, HTML/CSS
2. Frameworks: React, Node.js, Express.js
3. Databases: MongoDB, SQL, MySQL
4. Libraries: None specifically mentioned 

In [57]:
print(qa_chain.memory)

chat_memory=InMemoryChatMessageHistory(messages=[HumanMessage(content='give me some skills to amina', additional_kwargs={}, response_metadata={}), AIMessage(content='Based on the provided context, here are some skills that Amina Akhtar possesses:\n\n1. Programming languages:\n\t* Java\n\t* Python\n\t* C/C++\n\t* JavaScript\n\t* HTML/CSS\n2. Frameworks:\n\t* React\n\t* Node.js\n\t* Express.js\n\t* JUnit\n3. Databases:\n\t* SQL\n\t* MySQL\n\t* MongoDB\n4. Developer Tools:\n\t* Git\n\t* Postman\n\t* VS Code\n\t* Jupyter Notebook\n\t* IntelliJ\n\t* Eclipse OCL\n\t* Figma\n\t* Maze\n5. Libraries:\n\t* pandas\n\t* NumPy\n\t* Matplotlib\n\t* TensorFlow\n\t* Keras\n6. Other skills:\n\t* AI and machine learning (as seen in the AI-Plant ID project)\n\t* Full-stack web development (as seen in the Cardify project)\n\t* Desktop application development (as seen in the POS for a SuperStore and LESCO Billing System projects)\n\t* User authentication and authorization\n\t* Data storage and management\n