In [1]:
print("Hello World!")

Hello World!


In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get the API key
api_key = os.getenv("OPENAI_API_KEY")

# Set it as an environment variable (if needed)
os.environ["OPENAI_API_KEY"] = api_key

print("API Key Loaded Successfully" if api_key else "API Key Not Found")


API Key Loaded Successfully


In [3]:
import fitz
import glob

def load_all_pdfs_in_directory(directory="./content/"):
  combined_text = ""
  pdf_paths = glob.glob(f"{directory}/*.pdf")

  for file_path in pdf_paths:
    with fitz.open(file_path) as pdf:
      for page in pdf:
        combined_text += page.get_text()

  return combined_text

pdf_data = load_all_pdfs_in_directory()
print("Pdf_data: ", pdf_data[:500])

Pdf_data:  Saimahith Chigurupati 
Boston, MA | 857-693-9706 | mahithchigurupati@gmail.com | LinkedIn | GitHub 
Software Engineer with 5+ Years of Expertise in Designing Scalable Systems and Delivering Innovative Solutions  
SKILLS 
Programming Languages  
Python, Java, C, JavaScript 
Web Technologies 
 
Spring Boot, FastAPI, Node.js, React.js, Next.js, Tailwind, REST API, GraphQL, Django REST 
DevOps and Cloud 
 
AWS, GCP, Linux, Git, Docker, Kubernetes, Jenkins, CI/CD, Terraform, Packer, Apache Kafka 
Dat


In [4]:
len(pdf_data)

5577

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

documents = text_splitter.split_text(pdf_data)
print("Number of documents: ", len(documents))


Number of documents:  7


In [6]:
documents[0]

'Saimahith Chigurupati \nBoston, MA | 857-693-9706 | mahithchigurupati@gmail.com | LinkedIn | GitHub \nSoftware Engineer with 5+ Years of Expertise in Designing Scalable Systems and Delivering Innovative Solutions  \nSKILLS \nProgramming Languages  \nPython, Java, C, JavaScript \nWeb Technologies \n \nSpring Boot, FastAPI, Node.js, React.js, Next.js, Tailwind, REST API, GraphQL, Django REST \nDevOps and Cloud \n \nAWS, GCP, Linux, Git, Docker, Kubernetes, Jenkins, CI/CD, Terraform, Packer, Apache Kafka \nDatabases & Tools \n \nMySQL, PostgreSQL, NoSQL, MongoDB, Redis, Azure SQL, Hibernate, RabbitMQ, SwiftUI \nCertifications \n \n                AWS Certified Solutions Architect, Pega Certified Senior System Architect \n \nWORK EXPERIENCE \nSoftware Engineer, Walmart                                                                                                                                                          Oct 2024 – present \n•'

In [7]:
documents[1]

'• \nCollaborating with the SRE team to develop automation tools, improving uptime for cloud services powering Walmart \napplications using Java, Spring Boot, Azure SQL, Python, and FastAPI. \n• \nContributing to Business Continuity and Disaster Recovery (BCDR) initiatives, enhancing the reliability of Walmart’s applications \nduring high-traffic events like thanksgiving sales, directly impacting major revenue generation. \n• \nDesigning and developing backend applications to enhance security and compliance of applications across the organization, \nensuring alignment with security best practices and regulatory standards. \n \nSoftware Engineer, Keelworks                                                                                                                                                       Jul 2024 – Oct 2024 \n• \nContributed to the development of a digital knowledge hub, building a centralized platform for community-driven education, \ncareer advancement, and knowledge s

In [8]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

embedded_documents = embeddings.embed_documents(documents)

In [9]:
print("Embedding for the first document")
for i, value in enumerate(embedded_documents[0]):
  print(f"Component {i+1}: {value}")
  if i >= 5:
    break

Embedding for the first document
Component 1: -0.005875465925782919
Component 2: -0.013189268298447132
Component 3: 0.006564103066921234
Component 4: -0.017599260434508324
Component 5: -0.007239171303808689
Component 6: 0.02708413451910019


In [10]:
from langchain_chroma import Chroma
import chromadb

client = chromadb.Client()

chroma_db_directory = "./chroma_index"

vectorstore = Chroma.from_texts(texts=documents, embedding=embeddings, client=client, persist_directory=chroma_db_directory)

print("Vectorstore created and stored successfully")

Vectorstore created and stored successfully


In [11]:
print(f"Number of documents indexed: {len(documents)}")

Number of documents indexed: 7


In [12]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [13]:
def pretty_print(docs):
  print(
      f"\n{'-' * 100}\n".join(
          [f"Document {i+1}:\n\n"+ d.page_content for i,d in enumerate(docs)]
      )
  )

In [14]:
query_1 = "who does the document discuss about?"
docs = retriever.invoke(query_1)
pretty_print(docs)

query_2 = "what are his achievements?"
docs = retriever.invoke(query_2)
pretty_print(docs)

Document 1:

• 
Revamped the architecture of an Equity and Debt investment processing application for World Bank Group, improving case 
processing lifecycle and disbursement time by 30% through advanced system design. 
• 
Designed and implemented REST APIs for Java and .NET applications, integrating core business logic and configuring SLAs and 
routings to optimize workflow efficiency. 
• 
Utilized Splunk and Tracer for troubleshooting and resolving production issues, ensuring zero downtime and delivering timely 
fixes with minimal impact on end-users. 
• 
Provided L2 support for Pegasystems, diagnosing and resolving infrastructure and product-level issues to maintain system 
integrity and enhance user experience. 
• 
Collaborated with cross-functional teams using JIRA in an Agile environment, contributing to SDLC phases and applying best 
design patterns to ensure timely and high-quality feature delivery. 
 
EDUCATION
-------------------------------------------------------------------

In [15]:
from langchain.embeddings import OllamaEmbeddings
from langchain_chroma import Chroma
import chromadb
from langchain_cohere import CohereRerank
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
import os
import cohere

cohere_api_key = os.getenv("COHERE_API_KEY")
cohere_client = cohere.ClientV2(cohere_api_key)

cohere_reranker = CohereRerank(client=cohere_client, model="rerank-english-v3.0", top_n=3)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=cohere_reranker,
    base_retriever=retriever
)

# Example queries
query_1 = "who does the document discuss about?"
query_2 = "what are his achievements?"

# Retrieve and rerank documents
reranked_docs_1 = compression_retriever.invoke(query_1)
reranked_docs_2 = compression_retriever.invoke(query_2)

# Pretty print the results
print("Results for query 1:")
pretty_print(reranked_docs_1)

print("\nResults for query 2:")
pretty_print(reranked_docs_2)

Results for query 1:
Document 1:

Saimahith Chigurupati 
Boston, MA | 857-693-9706 | mahithchigurupati@gmail.com | LinkedIn | GitHub 
Software Engineer with 5+ Years of Expertise in Designing Scalable Systems and Delivering Innovative Solutions  
SKILLS 
Programming Languages  
Python, Java, C, JavaScript 
Web Technologies 
 
Spring Boot, FastAPI, Node.js, React.js, Next.js, Tailwind, REST API, GraphQL, Django REST 
DevOps and Cloud 
 
AWS, GCP, Linux, Git, Docker, Kubernetes, Jenkins, CI/CD, Terraform, Packer, Apache Kafka 
Databases & Tools 
 
MySQL, PostgreSQL, NoSQL, MongoDB, Redis, Azure SQL, Hibernate, RabbitMQ, SwiftUI 
Certifications 
 
                AWS Certified Solutions Architect, Pega Certified Senior System Architect 
 
WORK EXPERIENCE 
Software Engineer, Walmart                                                                                                                                                          Oct 2024 – present 
•
-----------------------------------

In [16]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4-0613", temperature=0.5, max_tokens=3000)
qa_chain = RetrievalQA.from_chain_type(llm, retriever=compression_retriever)

In [17]:
user_query_1 = "who does the document discuss about?"
user_query_2 = "what are his achievements?"
user_query_3 = "what certifications does he have?"
user_query_4 = "what are his skills?"
user_query_5 = "Does he have any experience working with AI/ML or Blockchain?"
user_query_6 = "what are his qualifications?"
user_query_7 = "what are his experiences?"

user_queries = [user_query_3, user_query_4, user_query_5, user_query_6, user_query_7]

In [18]:
for user_query in user_queries:
  prompt = (
      "You are an expert assistant with a strong grasp of the subject matter. "
      "Please answer the following question succinctly, highlighting the key points. "
      f"Format your response as follows: \n\n"
      f" [Your answer herel\n"
      f"Key Points: \n"
      f"- Point 1: [Key insight 1]\n"
      f"- Point 2: [Key insight 2]\n"
      f"- Point 3: [Key insight 3]\n\n"
      f"Ensure your response is refevant and avoid unnecessary elaboration. "
      f"Answer the following question: '{(user_query)}'"
    )
  response = qa_chain.invoke(prompt)
  print(f"Question: {user_query}\nAnswer: {response['result']}\n")
  print("-----------------------------------------------------------------------------------------------------------------")

# response = qa_chain.invoke(prompt)
# print(response['result'])

Question: what certifications does he have?
Answer: Saimahith Chigurupati has two certifications.

Key Points: 
- Point 1: He is an AWS Certified Solutions Architect.
- Point 2: He is a Pega Certified Senior System Architect.
- Point 3: These certifications indicate his expertise in cloud solutions and system architecture.

-----------------------------------------------------------------------------------------------------------------
Question: what are his skills?
Answer: Saimahith Chigurupati has a wide range of skills in software engineering and related fields. 

Key Points: 
- Point 1: He is proficient in several programming languages including Python, Java, C, and JavaScript.
- Point 2: He has expertise in web technologies such as Spring Boot, FastAPI, Node.js, React.js, Next.js, Tailwind, REST API, GraphQL, and Django REST.
- Point 3: He also has skills in DevOps and Cloud technologies like AWS, GCP, Linux, Git, Docker, Kubernetes, Jenkins, CI/CD, Terraform, Packer, and Apache K