In [30]:
! pip install unstructured openpyxl



In [31]:
from langchain_community.document_loaders import UnstructuredExcelLoader
from langchain.schema import Document 

# Load Excel data
excel_loader = UnstructuredExcelLoader(
    file_path = "../data/Rutvik Rathod ATA-KPA (1).xlsx",
)
excel_docs = excel_loader.load()

In [32]:
# Load custom input text (as a document)
custom_text = ''''Name: Alex Johnson
Title: Computer Engineer
Location: San Francisco, CA
Experience: 5 years
Skills:
 
Programming Languages: Python, Java, C++, JavaScript
Web Development: HTML, CSS, React, Node.js
Databases: MySQL, PostgreSQL, MongoDB
Version Control: Git, GitHub, GitLab
Operating Systems: Linux, Windows
Cloud Services: AWS, Google Cloud Platform
DevOps Tools: Docker, Kubernetes, Jenkins
Machine Learning: TensorFlow, PyTorch
Cybersecurity: Network Security, Encryption, Fi`rewalls
Education:

B.S. in Computer Engineering, Stanford University
Graduated: 2019
GPA: 3.8/4.0'''
custom_doc = [Document(page_content=custom_text)]

In [33]:
# Combine Excel and custom input text documents
combined_docs = excel_docs + custom_doc

In [36]:
from dotenv import load_dotenv

load_dotenv("../.env")

True

In [37]:
import os

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [38]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192")

In [39]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(combined_docs)

In [40]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
    )



In [41]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [42]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})

In [43]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    """"You are an helpful assistant for question-answering tasks.\n

    Please read the context and question below and provide an answer to the question. and also provide an explanation for your answer.\n
    
    Context:\n {context}?\n
    """
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [44]:
results = rag_chain.invoke({"input": "What is the purpose of the document?"})

Number of requested results 20 is greater than number of elements in index 11, updating n_results = 11


In [45]:
print(results['answer'])

The purpose of the document appears to be a performance evaluation or a self-assessment report for a computer engineer, Alex Johnson. The document outlines various Key Performance Areas (KPAs) and provides a breakdown of the tasks and projects undertaken by Alex during a specific period.

The document also includes comments and feedback from Alex's supervisor (PTA) and Alex's own reflections on his performance, skills, and core values. The purpose of this document may be to:

1. Evaluate Alex's performance and provide feedback for improvement.
2. Identify areas of strength and areas for development.
3. Set goals and objectives for future projects and tasks.
4. Demonstrate Alex's skills, experience, and qualifications to potential clients or employers.
5. Serve as a reference or portfolio for future job applications.

Overall, the document provides a comprehensive overview of Alex's work, skills, and values, and serves as a tool for personal and professional growth.


In [46]:
results = rag_chain.invoke({"input": "In which univercity did Alex Johnson graduate?"})

Number of requested results 20 is greater than number of elements in index 11, updating n_results = 11


In [47]:
print(results['answer'])

According to the given context, Alex Johnson graduated from Stanford University with a Bachelor of Science (B.S.) in Computer Engineering in 2019 with a GPA of 3.8/4.0.
