In [1]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from operator import itemgetter

In [2]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '<REDACTED>'
os.environ['GOOGLE_API_KEY'] = '<REDACTED>'

In [3]:
loader1 = PyPDFLoader(r'C:\Users\jaint\CC-Task2-RAG\SI Chronicles 23-24 Sem I.pdf')
loader2 = PyPDFLoader(r'C:\Users\jaint\CC-Task2-RAG\Placement Chronicles 2023-24.pdf')
pages = loader1.load()
pages.extend(loader2.load())

print(len(pages))

226


In [4]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=800, chunk_overlap=50)
splits = text_splitter.split_documents(pages)

vectorstore = Chroma.from_documents(documents=splits, embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0)

In [5]:
# Template for decomposing the question into five sub-questions
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate only the sub-problems using as little formatting as needed.\n
You must make them relevant from the perspective of a college student seeking help in securing placements. \n
Generate multiple sub-questions related to: {question} \n
Output (5 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

generate_queries_decomposition = (prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))


question = "What do I need to do to secure a placement in the finance field?"
questions = generate_queries_decomposition.invoke({"question": question})

questions = [q for q in questions if q]  # remove any empty strings
print(questions)

['What are the typical career paths in finance?', "What skills are most in-demand for finance roles I'm interested in?", 'How do I find internships or work experience relevant to finance?', 'How do I tailor my resume and cover letter for finance positions?', 'What are some common interview questions for finance roles? ']


In [6]:
# Template to answer sub-questions. Context is retrieved using HyDE
template = """You are a helpful assistant that answers questions based on the following context: {context}\n
You must answer the questions from the perspective of a college student seeking help in securing placements. \n
Answer using as little formatting as possible.\n
Question: {question}\n"""

prompt = ChatPromptTemplate.from_template(template)

def retrieve_and_rag(prompt_rag, sub_questions):
    rag_results = []
    
    # Template to generate the hypothetical document
    hyde_template = """Please write a scientific paper passage to answer the question
    Question: {question}
    Passage:"""
    prompt_hyde = ChatPromptTemplate.from_template(hyde_template)

    for sub_question in sub_questions:
        
        retrieval_chain = prompt_hyde | llm | StrOutputParser() | retriever
        retrieved_docs = retrieval_chain.invoke({"question": sub_question})
        
        ans_chain = prompt_rag | llm | StrOutputParser()
        ans = ans_chain.invoke({"context": retrieved_docs, "question": sub_question})
        rag_results.append(ans)

    return rag_results


answers = retrieve_and_rag(prompt, questions)
# print(answers)

def format_qa_pairs(questions, answers):

    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers)):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()


context = format_qa_pairs(questions, answers)

# Template to answer the main question, context is the previously answered sub-questions in the form of QA pairs
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
        prompt
        | llm
        | StrOutputParser()
)

print(final_rag_chain.invoke({"context": context, "question": question}))

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 4.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..
Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 8.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


To secure a placement in the finance field, you need a multifaceted approach that combines strong academics, relevant skills, and strategic career planning:

**1. Build a Strong Foundation in Finance:**

* **Focus on Key Coursework:**  Prioritize courses like Derivatives and Risk Management, Financial Risk Analytics and Management, Securities Analysis and Portfolio Management, and Financial Management.  A strong understanding of these concepts is crucial.
* **Supplement with a Finance Minor:** If your major isn't finance, consider a minor to demonstrate your commitment to the field.

**2. Develop In-Demand Technical and Soft Skills:**

* **Master Excel and SQL:** These tools are essential for data analysis in finance.
* **Hone Your Quantitative Skills:**  Be comfortable applying quantitative concepts, especially from courses like derivatives and risk management.
* **Stay Updated on Financial News:**  Regularly follow economic and financial news to demonstrate your interest and awarenes