In [2]:
import os
import numpy as np

from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
path_to_vectorstore = "data/vector_stores/"
os.listdir(path_to_vectorstore)

['flat', 'HNSW', 'ivfFlat']

# Retrieval

In [6]:
emb_model = GoogleGenerativeAIEmbeddings(
            model="models/embedding-001",
            google_api_key=os.getenv("GOOGLE_API_KEY")
        ) 

In [7]:
ivfFlat = FAISS.load_local(
    os.path.join(path_to_vectorstore, "ivfFlat"), 
    embeddings=emb_model,
    allow_dangerous_deserialization=True  # Required for recent versions
)

In [97]:
vector_stores = {}
vector_stores['ivfFlat'] = ivfFlat
# vector_stores['Flat'] = Flat
# vector_stores['HNSW'] = HNSW


In [98]:
vector_store = "ivfFlat"
question = "What are LG Energy solution's flagship products?"

contexts = vector_stores[vector_store].similarity_search_with_score(
    query,
    k=3,
    search_type="similarity")

In [134]:
def documents_to_text(documents, with_dist=True):
    txt = ""
    for i, (c, dist) in enumerate(documents):
        curr_text = f"Document {i}: \n\n{c.page_content} \n\n"
        if i == len(documents) - 1:
            curr_text = f"Document {i}: \n\n{c.page_content}"
        
        txt += curr_text
    return txt

In [135]:
annual_report_contexts_txt = documents_to_text(contexts)
print(annual_report_contexts_txt)

Document 0: 

information about the Groups reportable segments is as follows: Segment Major products and services LG Energy solution () Automotive batteries, mobile batteries, ESS batteries and others () The Group has determined the reporting segment as a single reporting segment based on the performance 

Document 1: 

LG Energy Solution Michigan Inc. (1) 100 100 USA December Automotive battery research and manufacturing LG Energy Solution Battery (Nanjing) Co., Ltd. 100 100 China December Automotive battery manufacturing and sales LG Energy Solution Wroclaw sp. z o.o. 100 100 Poland December Automotive battery 

Document 2: 

Germany December ESS battery sales and others LG Energy Solution (Taiwan) Ltd. 100 100 Taiwan December Mobile battery sales and others Areumnoori Co., Ltd. 100 100 Korea December Facility management and cleaning LG Energy Solution Fund I LLC 100 100 USA December Investment in ventures LG Energy


### How to check/evaluate whether retrieved context is most relevant?

1. Is there a way to find out without creating answer set with your entire document? If so, how can I cover infinite number of questions...


In RAG evaluation, "relevance" can mean two things:
- Retrieval relevance: Did the retriever return passages that actually contain the information needed?
- Answer relevance: Did the generated answer stay ???faithful to the retrieved content and to the ???ground truth?


Methods:<br>
Human-annotated
- Have annotators label degree of relevancy of retrieved context to query. ex:{relevant, partially relevant, irrelevant}
-  rel(q,d) $\in$ {0, 1, 2} ex: (Q, contexts_i, 1) becomes ground truth.

- Same philosophy as classical IR benchmarks like TREC, MS MARCO, or BEIR -> they don’t know “all possible relevant passages,” but they fix a representative set of queries + relevance annotations and then measure retrieval against that.

# Generation

In [126]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

In [127]:
text_gen_model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0.1
)

In [157]:
company_name = "LG energy solution"
chain = company_analysis_prompt | text_gen_model
response = chain.invoke({
    "company_name":company_name,
    "annual_report_context":annual_report_contexts_txt,
    "question":question})

In [160]:
print(annual_report_contexts_txt)

Document 0: 

information about the Groups reportable segments is as follows: Segment Major products and services LG Energy solution () Automotive batteries, mobile batteries, ESS batteries and others () The Group has determined the reporting segment as a single reporting segment based on the performance 

Document 1: 

LG Energy Solution Michigan Inc. (1) 100 100 USA December Automotive battery research and manufacturing LG Energy Solution Battery (Nanjing) Co., Ltd. 100 100 China December Automotive battery manufacturing and sales LG Energy Solution Wroclaw sp. z o.o. 100 100 Poland December Automotive battery 

Document 2: 

Germany December ESS battery sales and others LG Energy Solution (Taiwan) Ltd. 100 100 Taiwan December Mobile battery sales and others Areumnoori Co., Ltd. 100 100 Korea December Facility management and cleaning LG Energy Solution Fund I LLC 100 100 USA December Investment in ventures LG Energy


In [158]:
print(response.content)

yes
LG Energy Solution's flagship products are **Automotive batteries, mobile batteries, and ESS batteries**.

**Reasoning:**
Document 0, under the section "Segment Major products and services" for "LG Energy solution", explicitly states its major products and services as "Automotive batteries, mobile batteries, ESS batteries and others". This directly identifies these three types of batteries as the core offerings of LG Energy Solution.


In [156]:
company_analysis_prompt = ChatPromptTemplate.from_template("""
You are one of the best financial analyst who is also is well aware of history.
You have been following {company_name} for a long time and well aware of how it has evolved as well 
as its prospects.

By leveraging given context, answer the following question.

If you can't find relevant information from given context, notify that relevant information did not exist.
Then without looking at relevant information, give answer.

<annual report context>
{annual_report_context}
</annual report context>

question: {question}

output requirements:
- "yes" or "no" answer to whether you found relevant information from given context.
- Please provide a detailed, accurate answer based on given context. 
- Be sure to provide strong reasoning that backs your answer.
""")

In [144]:
print(company_analysis_prompt.format(company_name=company_name,
                              annual_report_context=annual_report_contexts_txt,
                              question=question))

Human: 
You are one of the best financial analyst who is also is well aware of history.
You have been following LG energy solution for a long time and well aware of how it has evolved as well 
as its prospects.

By leveraging given context, answer the following question.

<annual report context>
Document 0: 

information about the Groups reportable segments is as follows: Segment Major products and services LG Energy solution () Automotive batteries, mobile batteries, ESS batteries and others () The Group has determined the reporting segment as a single reporting segment based on the performance 

Document 1: 

LG Energy Solution Michigan Inc. (1) 100 100 USA December Automotive battery research and manufacturing LG Energy Solution Battery (Nanjing) Co., Ltd. 100 100 China December Automotive battery manufacturing and sales LG Energy Solution Wroclaw sp. z o.o. 100 100 Poland December Automotive battery 

Document 2: 

Germany December ESS battery sales and others LG Energy Solution (T

# Evaluation

## Retrieval

## Generation

## Integration