In [1]:
import pandas as pd
import numpy as np


In [2]:
df = pd.read_csv("fundamentals_dataset.csv")

In [3]:
df.head()


Unnamed: 0,period,company,tickers,indicator,unit,amount
0,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,Assets,US Dollar,42854000
1,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,"Cash and Cash Equivalents, at Carrying Value",US Dollar,18330000
2,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,"Cash and Cash Equivalents, Period Increase (De...",US Dollar,3323000
3,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,Final Revenue,US Dollar,4173000
4,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,Income from Continuing Operations before Taxes,US Dollar,2307000


In [4]:
df.describe()

Unnamed: 0,period,company,tickers,indicator,unit,amount
count,186336,186336,186336,186336,186336,186336
unique,12,2152,2131,10,7,119501
top,2016 Q1,"Recon Technology, Ltd",RCON,Assets,US Dollar,0
freq,17043,199,199,23455,186212,1098


In [5]:
df.tail()

Unnamed: 0,period,company,tickers,indicator,unit,amount
186331,2016 Q4,ZYNGA INC,ZNGA,Final Revenue,US Dollar,190540000
186332,2016 Q4,ZYNGA INC,ZNGA,Net Income (Loss),US Dollar,-35432000
186333,2016 Q4,ZYNGA INC,ZNGA,Operating Income (Loss),US Dollar,-34461000
186334,2016 Q4,ZYNGA INC,ZNGA,Total Equity,US Dollar,1580664000
186335,2016 Q4,ZYNGA INC,ZNGA,Total Liabilities and Equity,US Dollar,1905849000


In [6]:
df['amount'] = df['amount'].replace('[\$,]', '', regex=True).astype(float)


In [7]:
df

Unnamed: 0,period,company,tickers,indicator,unit,amount
0,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,Assets,US Dollar,4.285400e+07
1,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,"Cash and Cash Equivalents, at Carrying Value",US Dollar,1.833000e+07
2,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,"Cash and Cash Equivalents, Period Increase (De...",US Dollar,3.323000e+06
3,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,Final Revenue,US Dollar,4.173000e+06
4,2014 Q1,"1347 Property Insurance Holdings, Inc.",PIH,Income from Continuing Operations before Taxes,US Dollar,2.307000e+06
...,...,...,...,...,...,...
186331,2016 Q4,ZYNGA INC,ZNGA,Final Revenue,US Dollar,1.905400e+08
186332,2016 Q4,ZYNGA INC,ZNGA,Net Income (Loss),US Dollar,-3.543200e+07
186333,2016 Q4,ZYNGA INC,ZNGA,Operating Income (Loss),US Dollar,-3.446100e+07
186334,2016 Q4,ZYNGA INC,ZNGA,Total Equity,US Dollar,1.580664e+09


In [8]:
df = df.dropna()

In [11]:
df.dtypes


period        object
company       object
tickers       object
indicator     object
unit          object
amount       float64
dtype: object

In [10]:
!pip install sentence-transformers





In [11]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-MiniLM-L6-v2')
df['text'] = (
    df['indicator'].astype(str) + " " +
    df['company'].astype(str) + " in " +
    df['period'].astype(str) + " was $" +
    df['amount'].astype(str)
)

df['embedding'] = df['text'].apply(lambda x: model.encode(x))


In [12]:
import faiss
import numpy as np

# embedding_matrix = np.vstack(df['embedding'].values).astype('float32')
# dimension = embedding_matrix.shape[1]

# # Initialize FAISS index
# index = faiss.IndexFlatIP(dimension)
# faiss.normalize_L2(embedding_matrix)  # Normalize for inner product similarity
# index.add(embedding_matrix)

# # Save mapping of index to original text rows
# text_mapping = df['text'].tolist()

embedding_matrix = np.vstack(df['embedding'].values).astype('float32')
faiss.normalize_L2(embedding_matrix)
index = faiss.IndexFlatIP(embedding_matrix.shape[1])
index.add(embedding_matrix)
text_mapping = df['text'].tolist()



In [12]:
query = "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?"
query_vector = model.encode(query)

import numpy as np
faiss.normalize_L2(np.array([query_vector]))
D, I = index.search(np.array([query_vector]), k=1000)

# Get the retrieved top-5 context results
retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
print("Retrieved Context:\n", retrieved_context)

Retrieved Context:
 Gross Profit LUMINEX CORP in 2016 Q3 was $45665000.0
Gross Profit LUMINEX CORP in 2016 Q2 was $44921000.0
Gross Profit LUMINEX CORP in 2014 Q1 was $39954000.0
Gross Profit LUMINEX CORP in 2015 Q3 was $41812000.0
Total Equity LUMINEX CORP in 2016 Q1 was $377231000.0
Gross Profit LUMINEX CORP in 2016 Q1 was $44806000.0
Total Equity LUMINEX CORP in 2016 Q2 was $394958000.0
Gross Profit LUMINEX CORP in 2015 Q2 was $43270000.0
Gross Profit LUMINEX CORP in 2016 Q4 was $44263000.0
Gross Profit LUMINEX CORP in 2015 Q1 was $40219000.0
Cash and Cash Equivalents, Period Increase (Decrease) LUMINEX CORP in 2016 Q3 was $2427000.0
Total Equity LUMINEX CORP in 2015 Q1 was $327071000.0
Total Liabilities and Equity LUMINEX CORP in 2016 Q3 was $441128000.0
Cash and Cash Equivalents, Period Increase (Decrease) LUMINEX CORP in 2016 Q1 was $5756000.0
Total Equity LUMINEX CORP in 2015 Q2 was $332392000.0
Gross Profit LUMINEX CORP in 2014 Q3 was $39010000.0
Total Equity LUMINEX CORP in 20

In [13]:
# Step 6: RAG with Local LLaMA Model


# from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# # Load the LLaMA model (use a lightweight one like TinyLlama or Phi-2 if needed)
# model_name = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"  # Replace with actual model you have access to
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# llm = AutoModelForCausalLM.from_pretrained(model_name)

# generator = pipeline("text-generation", model=llm, tokenizer=tokenizer, max_length=512)

# context = "\\n".join(results)
# prompt = f"Answer the following question based on the context:\\n{context}\\n\\nQuery: {query}"

# response = generator(prompt, max_new_tokens=100)
# print(response[0]['generated_text'])



from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local LLaMA 2 model
def query_llama2(prompt):
    response = client.chat.completions.create(
        model="llama2",  # Must match the model you pulled in Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content


# Maunal Evaluation


In [14]:
prompt = f"Query: {query}"
baseline_output = query_llama2(prompt)
print("Baseline Response:\n", baseline_output)


Baseline Response:
 
As a smart financial data analysis assistant, I gladly assist you in finding the best-performing quarter for LUMINEX CORP based on multiple indicators. Please provide me with the following information:

1. Time period: Are you looking for the best-performing quarter over a specific time frame (e.g., last 5 years, last 10 years)?
2. Metrics: Which financial metrics do you want to use to evaluate LUMINEX CORP's performance? Some common metrics include revenue growth, profit margin, return on equity (ROE), and return on assets (ROA).
3. Data source: Are you looking for data from a specific source, such as Yahoo Finance or Bloomberg?

Once I have this information, I can perform a detailed analysis of LUMINEX CORP's performance during different quarters based on your selected metrics and time frame.


In [15]:
rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
rag_output = query_llama2(rag_prompt)
print("RAG-Enhanced Response:\n", rag_output)


RAG-Enhanced Response:
 
Based on the revenue and net income figures provided, the best performing quarter for Luminex Corp is Q3 of 2016, with a revenue of $189.5 million and a net income of $147.7 million, resulting in a net income growth rate of 156%.

In second place is Q2 of 2016, with a revenue of $147.3 million and a net income of $105.8 million, resulting in a net income growth rate of 144%.

Third place goes to Q4 of 2015, with a revenue of $112.5 million and a net income of $86.5 million, resulting in a net income growth rate of 73%.

It's important to note that these are just quarterly figures and the performance of a company can vary significantly over the course of a year, so it's best to look at the full-year figures as well when evaluating a company's overall performance.


## Llama2 - baseline (without RAG)

In [16]:

y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_llama2 = [0,0,1,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1,1,0,1,1,0,1]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_llama2)
recall = recall_score(y_true, y_pred_llama2)
f1 = f1_score(y_true, y_pred_llama2)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Accuracy: 0.43
Recall: 0.43
F1 Score: 0.60


## Llama2 - With RAG


In [17]:
y_pred_llama2_RAG = [0,1,1,1,1,1,1,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,0,1,1,0,0,0]
# len(y_pred_llama2_RAG)

from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_llama2_RAG)
recall = recall_score(y_true, y_pred_llama2_RAG)
f1 = f1_score(y_true, y_pred_llama2_RAG)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.50
Recall: 0.50
F1 Score: 0.67


In [18]:
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local LLaMA 3 model
def query_llama3(prompt):
    response = client.chat.completions.create(
        model="llama3",  # Change this if you are using a specific version like "llama3:8b"
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content


In [19]:
query = "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP between 2014 to 2016?"
query_vector = model.encode(query)

import numpy as np
faiss.normalize_L2(np.array([query_vector]))
D, I = index.search(np.array([query_vector]), k=100)

# Get the retrieved top-5 context results
retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
print("Retrieved Context:\n", retrieved_context)

Retrieved Context:
 Gross Profit LUMINEX CORP in 2016 Q2 was $44921000.0
Gross Profit LUMINEX CORP in 2016 Q3 was $45665000.0
Total Equity LUMINEX CORP in 2016 Q1 was $377231000.0
Total Equity LUMINEX CORP in 2016 Q2 was $394958000.0
Gross Profit LUMINEX CORP in 2016 Q1 was $44806000.0
Gross Profit LUMINEX CORP in 2016 Q4 was $44263000.0
Gross Profit LUMINEX CORP in 2014 Q1 was $39954000.0
Total Equity LUMINEX CORP in 2016 Q3 was $402765000.0
Total Liabilities and Equity LUMINEX CORP in 2016 Q3 was $441128000.0
Gross Profit LUMINEX CORP in 2014 Q3 was $39010000.0
Operating Income (Loss) LUMINEX CORP in 2016 Q2 was $7500000.0
Gross Profit LUMINEX CORP in 2015 Q3 was $41812000.0
Cash and Cash Equivalents, Period Increase (Decrease) LUMINEX CORP in 2016 Q3 was $2427000.0
Gross Profit LUMINEX CORP in 2014 Q2 was $38147000.0
Gross Profit LUMINEX CORP in 2015 Q2 was $43270000.0
Gross Profit LUMINEX CORP in 2015 Q1 was $40219000.0
Final Revenue LUMINEX CORP in 2016 Q3 was $71221000.0
Net Inco

In [20]:
prompt = f"Query: {query}"
baseline_output = query_llama3(prompt)
print("Baseline Response:\n", baseline_output)

Baseline Response:
 Let me analyze the quarterly performance of Luminex Corporation (LMNX) from 2014 to 2016 based on multiple indicators.

I've obtained the quarterly financial data from Yahoo Finance and performed a comprehensive analysis. Here are the results:

Overall, the best-performing quarter for Luminex Corporation between 2014 to 2016 was Q1 2015.

Here are some key metrics that support this conclusion:

1. Revenue Growth Rate: In Q1 2015, revenue grew by 34.2% year-over-year, which is significantly higher than any other quarter during this period.
2. EPS (Earnings Per Share): The company reported a surge in EPS growth, with a 50.0% increase in Q1 2015 compared to the same quarter in 2014. This makes it the highest EPS-growth quarter during this time frame.
3. Gross Margin: Luminex's gross margin expanded to 74.2% in Q1 2015, which was the highest during this period. A higher gross margin indicates strong pricing power and operational efficiency.
4. Operating Cash Flow (OCF):

In [21]:
rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
rag_output = query_llama3(rag_prompt)
print("RAG-Enhanced Response:\n", rag_output)

RAG-Enhanced Response:
 After analyzing the data, I found that the best performing quarter overall for LUMINEX CORP between 2014 to 2016 is Q3 of 2016. Here's a summary of the excellent performance in this quarter:

1. Gross Profit: $45665000 (a significant increase from previous quarters)
2. Operating Income: $4028000 (highest operating income for the period)
3. Net Income: $2751000 (exceeded net income in previous quarters)
4. Total Revenue: $71221000 (highest total revenue for the quarter)

This impressive performance in Q3 of 2016 is likely due to strong financial management, effective marketing strategies, and successful business operations.

To further validate this observation, let's examine some additional metrics:

1. Gross Profit Margin: The gross profit margin trend shows a steady increase throughout the three-year period, with Q3 of 2016 having the highest margin.
2. Cash and Cash Equivalents Increase/Decrease: The cash reserves in Q3 of 2016 saw an increase of $24.27 milli

## Llama3 - Baseline response (Without RAG)

In [22]:
y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_llama3 = [0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1,1]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_llama3)
recall = recall_score(y_true, y_pred_llama3)
f1 = f1_score(y_true, y_pred_llama3)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.46
Recall: 0.46
F1 Score: 0.63


## Llama3 - RAG enhanced response

In [23]:
y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_llama3_RAG = [1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_llama3_RAG)
recall = recall_score(y_true, y_pred_llama3_RAG)
f1 = f1_score(y_true, y_pred_llama3_RAG)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.54
Recall: 0.54
F1 Score: 0.70


In [24]:
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local Qwen2:7b model
def query_qwen2(prompt):
    response = client.chat.completions.create(
        model="qwen2:7b",  # Use exact model name you pulled via Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content



In [25]:
query = "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes? and instead of giving me steps on what to do, you do that urself and give me my desired answer"
query_vector = model.encode(query)

import numpy as np
faiss.normalize_L2(np.array([query_vector]))
D, I = index.search(np.array([query_vector]), k=100)

# Get the retrieved top-5 context results
retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
print("Retrieved Context:\n", retrieved_context)

Retrieved Context:
 Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2016 Q1 was $43717000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2016 Q2 was $51962000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2014 Q2 was $47627000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2014 Q1 was $37151000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2015 Q1 was $38379000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2016 Q3 was $65675000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2014 Q3 was $45523000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2014 Q4 was $34478000.0
Income from Continuing Operations before Taxes HANDY & HARMAN LTD. in 2015 Q2 was $48176000.0
Operating Income (Loss) HANDY & HARMAN LTD. in 2014 Q2 was $17585000.0
Income from Continuing Operations before Taxes HANDY & HARMAN L

In [26]:
prompt = f"Query: {query}"
baseline_output = query_qwen2(prompt)
print("Baseline Response:\n", baseline_output)

Baseline Response:
 To determine in which quarter between 2014 and 2026 HANDY & HARMAN LTD. had the highest income from continuing operations before taxes, I would typically undertake several steps:

**Step 1: Access Data Sources**
Firstly, one might need to access reliable financial statements of HANDY & HARMAN LTD., such as their annual reports or SEC filings like Form 10-K. These documents contain detailed financial data including income from continuing operations before taxes.

**Step 2: Data Collection**
Collect the quarterly revenue figures for each quarter between 2014 and 2026 from relevant sources.

**Step 3: Sorting and Comparison**
Sort these revenues by the quarters within their respective years, then compare the figures from year to year.

**Step 4: Identify Peak Quarter(s)**
Identify which quarter yielded the highest income from continuing operations before taxes during this period. This could require looking at every quarter's revenue individually over several years if n

In [27]:
rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
rag_output = query_qwen2(rag_prompt)
print("RAG-Enhanced Response:\n", rag_output)

RAG-Enhanced Response:
 The highest income from continuing operations before taxes for HANDY & HARMAN LTD occurred in 2016 Q2 with $51,962,000.


## Qwen2:7b - baseline response (without RAG)

In [28]:
y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_qwen2 = [0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_qwen2)
recall = recall_score(y_true, y_pred_qwen2)
f1 = f1_score(y_true, y_pred_qwen2)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.29
Recall: 0.29
F1 Score: 0.44


## Qwen2:7b (RAG enhanced response)

In [29]:
y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_qwen2_RAG = [1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,1,1,1,0,0,0,1,0,0,0]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_qwen2_RAG)
recall = recall_score(y_true, y_pred_qwen2_RAG)
f1 = f1_score(y_true, y_pred_qwen2_RAG)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.64
Recall: 0.64
F1 Score: 0.78


In [30]:
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local Yi:9b model
def query_yi9b(prompt):
    response = client.chat.completions.create(
        model="yi:9b",  # Use exact model name you pulled via Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content




In [31]:
query = "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters between 2014 to 2016?"
query_vector = model.encode(query)

import numpy as np
faiss.normalize_L2(np.array([query_vector]))
D, I = index.search(np.array([query_vector]), k=10)

# Get the retrieved top-5 context results
retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
print("Retrieved Context:\n", retrieved_context)

Retrieved Context:
 Total Equity ALIGN TECHNOLOGY INC in 2016 Q4 was $995389000.0
Total Equity ALIGN TECHNOLOGY INC in 2014 Q4 was $752771000.0
Assets ALIGN TECHNOLOGY INC in 2016 Q4 was $1396151000.0
Total Equity ALIGN TECHNOLOGY INC in 2015 Q4 was $847926000.0
Total Equity ALIGN TECHNOLOGY INC in 2016 Q2 was $909858000.0
Net Income (Loss) ALIGN TECHNOLOGY INC in 2016 Q4 was $47621000.0
Cash and Cash Equivalents, Period Increase (Decrease) ALIGN TECHNOLOGY INC in 2016 Q1 was $-13296000.0
Gross Profit ALIGN TECHNOLOGY INC in 2016 Q4 was $220249000.0
Total Equity ALIGN TECHNOLOGY INC in 2016 Q3 was $972310000.0
Gross Profit ALIGN TECHNOLOGY INC in 2014 Q4 was $150662000.0


In [32]:
# Example baseline query
prompt = f"Query: {query}"
baseline_output = query_yi9b(prompt)
print("Baseline Response:\n", baseline_output)

Baseline Response:
 To analyze the performance of ALIGN TECHNOLOGY INC specifically in the fourth quarter (Q4) relative to other quarters over the period from 2014 to 2016, we will consider several financial metrics. This includes revenue growth, earnings per share (EPS) growth, and return on equity (ROE) among others. The analysis focuses on Q4s (October - Decemeber) as these quarters encompass the holiday shopping season, which can influence performance differently than other periods of the year for retailers.

1. **Revenue Growth**: Tracking the company's revenue growth quarter by quarter allows us to see how it fared in relation to Q4 specifically. In general, many companies report higher sales during Q4 due to holiday retail season activities. It's important to note if ALIGN TECHNOLOGY INC consistently surpassed its previous years' revenues in this period or not.

2. **Earnings Per Share (EPS)**: EPS measures the amount of net income each share is entitled to and reflects how much

In [33]:
rag_prompt = f"Context:\n{retrieved_context}\nQuery: {query}"
rag_output = query_yi9b(rag_prompt)
print("RAG-Enhanced Response:\n", rag_output)

RAG-Enhanced Response:
 To evaluate how ALIGN TECHNOLOGY INC performed in Q4s compared to other quarters between 2014 and 2016, let's look at the key metrics provided: Total Equity, Net Income (Loss), Cash and Cash Equivalents, Period Increase (Decrease), and Gross Profit.

1. **Total Equity**:
   - Q4 2016: $995,389,000.0
   - Q4 2015: $847,926,000.0
   - Q4 2014: $752,771,000.0
   - Q3 2016: $972,310,000.0

In terms of Total Equity, Q4 2016 showed the highest equity among all quarters mentioned for this period ($995,389,000.0), indicating a strong financial position at the end of 2016. Q4 2014 had the lowest total equity during this period.

2. **Net Income (Loss)****:
   - Q4 2016: $476,210,000.0 (This suggests profitable operations for ALIGN TECHNOLOGY INC in Q4 2016)
   - Other quarters do not include Net Income figures.

The Net Income for Q4 2016 was the highest among these quarters, indicating that it was a financially lucrative period for the company. The lack of data for othe

## Yi : 9b - Baseline response (without RAG)

In [34]:
y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_yi = [0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,1,1,0]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_yi)
recall = recall_score(y_true, y_pred_yi)
f1 = f1_score(y_true, y_pred_yi)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.21
Recall: 0.21
F1 Score: 0.35


## Yi : 9b - RAG enhanced response

In [35]:
y_true = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
# len(y_true)

y_pred_yi_RAG = [0,0,1,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,1,1,1,1,0,0,1,0,0,0]


from sklearn.metrics import accuracy_score, recall_score, f1_score

accuracy = accuracy_score(y_true, y_pred_yi_RAG)
recall = recall_score(y_true, y_pred_yi_RAG)
f1 = f1_score(y_true, y_pred_yi_RAG)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.54
Recall: 0.54
F1 Score: 0.70


# Automated evaluation using RougeScore and BertScore

In [37]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting absl-py (from rouge-score)
  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting nltk (from rouge-score)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading absl_py-2.3.0-py3-none-any.whl (135 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   -------------------- ------------------- 0.8/1.5 MB 4.8 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 4.2 MB/s eta 0:00:00
Building wheels for collected packages: rouge-score
  Building wheel for rouge-sco



In [41]:
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]

## Llama3 Rougescore

In [21]:
# from openai import OpenAI
# from rouge_score import rouge_scorer

# # Step 1: Connect to your Ollama server
# client = OpenAI(
#     base_url="http://localhost:11434/v1",
#     api_key="ollama-key"
# )

# # Step 2: Function to query your model
# def query_llm(prompt, model_name="llama3"):
#     response = client.chat.completions.create(
#         model=model_name,
#         messages=[
#             {"role": "system", "content": "You are a smart financial data analysis assistant"},
#             {"role": "user", "content": prompt}
#         ]
#     )
#     return response.choices[0].message.content
from rouge_score import rouge_scorer
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local LLaMA 3 model
def query_llama3(prompt):
    response = client.chat.completions.create(
        model="llama3",  # Change this if you are using a specific version like "llama3:8b"
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

# Step 3: Set up ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Step 4: Example evaluation data (replace with your 28 questions)
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]


# Step 5: Evaluation loop — Baseline and RAG
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    query_vector = model.encode(query)





    # Example FAISS-retrieved context (replace this with your actual retrieved_context for the query)
    # For now, use dummy placeholder — fill this with your FAISS step 5
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    # Get the retrieved top-5 context result
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])  
    
    ### Baseline (no context)
    baseline_output = query_llama3(f"Query: {query}")
    baseline_score = scorer.score(expected, baseline_output)['rougeL'].fmeasure
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_llama3(rag_prompt)
    rag_score = scorer.score(expected, rag_output)['rougeL'].fmeasure
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_rougeL": baseline_score,
        "rag_output": rag_output,
        "rag_rougeL": rag_score
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"ROUGE-L (Baseline): {r['baseline_rougeL']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"ROUGE-L (RAG): {r['rag_rougeL']:.3f}")

# Step 7: Calculate overall average ROUGE-L
baseline_scores = [r['baseline_rougeL'] for r in results]
rag_scores = [r['rag_rougeL'] for r in results]

avg_baseline_rougeL = sum(baseline_scores) / len(baseline_scores)
avg_rag_rougeL = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average ROUGE-L (Baseline): {avg_baseline_rougeL:.3f}")
print(f"Overall Average ROUGE-L (RAG-Enhanced): {avg_rag_rougeL:.3f}")


Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---
According to Microsoft Corporation's quarterly earnings report for Q3 2016 (ended December 31, 2015), the income from continuing operations before taxes was:

**$4.44 billion**

Source: Microsoft Corporation's Form 10-Q filed with the Securities and Exchange Commission on January 22, 2016.

Here is the relevant excerpt from the report:

"Income from continuing operations before taxes: 

$4.44 billion"

If you have any follow-up questions or would like to query any other financial information related to Microsoft Corporation or any other publicly-traded company, feel free to ask!
ROUGE-L (Baseline): 0.165

--- RAG-Enhanced Output ---
According to the provided data, the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,

## Llama2 Rougescore

In [16]:
from rouge_score import rouge_scorer
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local LLaMA 2 model
def query_llama2(prompt):
    response = client.chat.completions.create(
        model="llama2",  # Must match the model you pulled in Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content


# Step 3: Set up ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Step 4: Example evaluation data (replace with your 28 questions)
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]


# Step 5: Evaluation loop — Baseline and RAG
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    query_vector = model.encode(query)





    # Example FAISS-retrieved context (replace this with your actual retrieved_context for the query)
    # For now, use dummy placeholder — fill this with your FAISS step 5
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    # Get the retrieved top-5 context result
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])  
    
    ### Baseline (no context)
    baseline_output = query_llama2(f"Query: {query}")
    baseline_score = scorer.score(expected, baseline_output)['rougeL'].fmeasure
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_llama2(rag_prompt)
    rag_score = scorer.score(expected, rag_output)['rougeL'].fmeasure
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_rougeL": baseline_score,
        "rag_output": rag_output,
        "rag_rougeL": rag_score
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"ROUGE-L (Baseline): {r['baseline_rougeL']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"ROUGE-L (RAG): {r['rag_rougeL']:.3f}")

# Step 7: Calculate overall average ROUGE-L
baseline_scores = [r['baseline_rougeL'] for r in results]
rag_scores = [r['rag_rougeL'] for r in results]

avg_baseline_rougeL = sum(baseline_scores) / len(baseline_scores)
avg_rag_rougeL = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average ROUGE-L (Baseline): {avg_baseline_rougeL:.3f}")
print(f"Overall Average ROUGE-L (RAG-Enhanced): {avg_rag_rougeL:.3f}")


Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---

As a financial data analysis assistant, I can provide you with the income from continuing operations before taxes for Microsoft Corporation in 2016 Q3. According to Microsoft's quarterly earnings report for Q3 2016, the company's income from continuing operations before taxes was $8.67 billion.

Here is the exact number from the report:

"Income from continuing operations before taxes was $8.67 billion, compared to $6.34 billion in the same period of the prior year."

I hope this information helps! Let me know if you have any other questions or if there's anything else I can help you with.
ROUGE-L (Baseline): 0.198

--- RAG-Enhanced Output ---
The income from continuing operations before taxes of MICROSOFT CORP in 2016 Q3 was $532500000.00.
ROUGE-L (RAG): 0

## Yi:9b Rougescore

In [17]:
from rouge_score import rouge_scorer
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local Yi:9b model
def query_yi9b(prompt):
    response = client.chat.completions.create(
        model="yi:9b",  # Use exact model name you pulled via Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content



# Step 3: Set up ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Step 4: Example evaluation data (replace with your 28 questions)
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]


# Step 5: Evaluation loop — Baseline and RAG
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    query_vector = model.encode(query)





    # Example FAISS-retrieved context (replace this with your actual retrieved_context for the query)
    # For now, use dummy placeholder — fill this with your FAISS step 5
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    # Get the retrieved top-5 context result
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])  
    
    ### Baseline (no context)
    baseline_output = query_yi9b(f"Query: {query}")
    baseline_score = scorer.score(expected, baseline_output)['rougeL'].fmeasure
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_yi9b(rag_prompt)
    rag_score = scorer.score(expected, rag_output)['rougeL'].fmeasure
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_rougeL": baseline_score,
        "rag_output": rag_output,
        "rag_rougeL": rag_score
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"ROUGE-L (Baseline): {r['baseline_rougeL']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"ROUGE-L (RAG): {r['rag_rougeL']:.3f}")

# Step 7: Calculate overall average ROUGE-L
baseline_scores = [r['baseline_rougeL'] for r in results]
rag_scores = [r['rag_rougeL'] for r in results]

avg_baseline_rougeL = sum(baseline_scores) / len(baseline_scores)
avg_rag_rougeL = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average ROUGE-L (Baseline): {avg_baseline_rougeL:.3f}")
print(f"Overall Average ROUGE-L (RAG-Enhanced): {avg_rag_rougeL:.3f}")


Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---
To find the income from continuing operations before taxes for Microsoft Corp. in the third quarter (Q3) of 2016, I would follow these steps:

1. **Locate the Financial Statements**: The financial details needed to answer this question are typically found in a company's quarterly report, also known as the "Form 10-Q" for U.S. publicly traded companies, which is available on the Securities and Exchange Commission (SEC) Edgar database or directly from the company's investor relations page.

2. **Identify Relevant Financial Statements**: In a Form 10-Q, financial information is summarized in several statements: the income statement provides details about revenues, expenses, and net earnings for the period. Specifically, "Income from Continuing Operations Before

## Qwen2:7b Rougescore

In [13]:
from rouge_score import rouge_scorer
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local Qwen2:7b model
def query_qwen2(prompt):
    response = client.chat.completions.create(
        model="qwen2:7b",  # Use exact model name you pulled via Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content



# Step 3: Set up ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

# Step 4: Example evaluation data (replace with your 28 questions)
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]


# Step 5: Evaluation loop — Baseline and RAG
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    query_vector = model.encode(query)





    # Example FAISS-retrieved context (replace this with your actual retrieved_context for the query)
    # For now, use dummy placeholder — fill this with your FAISS step 5
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    # Get the retrieved top-5 context result
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])  
    
    ### Baseline (no context)
    baseline_output = query_qwen2(f"Query: {query}")
    baseline_score = scorer.score(expected, baseline_output)['rougeL'].fmeasure
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_qwen2(rag_prompt)
    rag_score = scorer.score(expected, rag_output)['rougeL'].fmeasure
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_rougeL": baseline_score,
        "rag_output": rag_output,
        "rag_rougeL": rag_score
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"ROUGE-L (Baseline): {r['baseline_rougeL']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"ROUGE-L (RAG): {r['rag_rougeL']:.3f}")

# Step 7: Calculate overall average ROUGE-L
baseline_scores = [r['baseline_rougeL'] for r in results]
rag_scores = [r['rag_rougeL'] for r in results]

avg_baseline_rougeL = sum(baseline_scores) / len(baseline_scores)
avg_rag_rougeL = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average ROUGE-L (Baseline): {avg_baseline_rougeL:.3f}")
print(f"Overall Average ROUGE-L (RAG-Enhanced): {avg_rag_rougeL:.3f}")


Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---
To provide accurate information about Microsoft Corporation's ("Microsoft") "Income from Continuing Operations Before Taxes" for the third quarter of 2016, I would typically access financial reports or databases containing detailed company financial statements. However, since direct access to specific databases isn't provided here, I'll outline how you can find this data yourself.

Here are the steps to find Microsoft Corp's "Income from Continuing Operations Before Taxes" for the third quarter of 2016:

1. **Access Microsoft's Financial Statements**: One of the main sources would be Microsoft's official filings with the Securities and Exchange Commission (SEC), specifically their Form 10-Q reports which are filed quarterly by public companies to reflect the

## Bertscore 

In [14]:
!pip install bert-score


Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
Installing collected packages: bert-score
Successfully installed bert-score-0.3.13




## Llama3 Bertscore

In [17]:
import logging
logging.getLogger("transformers").setLevel(logging.ERROR)


In [18]:
from bert_score import score as bert_score
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",
    api_key="ollama-key"
)

# Function to generate response using your local LLaMA 3 model
def query_llama3(prompt):
    response = client.chat.completions.create(
        model="llama3",  # Change this if you are using another model
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

# Step 4: Your evaluation data
# (same eval_data as you already prepared — your 28 questions)
# For brevity, I’ll skip re-pasting your big list — use exactly the same
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]

# Step 5: Evaluation loop — Baseline and RAG with BERTScore
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    
    # === Step: Retrieve context using FAISS ===
    query_vector = model.encode(query)
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
    
    ### Baseline (no context)
    baseline_output = query_llama3(f"Query: {query}")
    P_baseline, R_baseline, F_baseline = bert_score([baseline_output], [expected], lang="en", rescale_with_baseline=True)
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_llama3(rag_prompt)
    P_rag, R_rag, F_rag = bert_score([rag_output], [expected], lang="en", rescale_with_baseline=True)
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_bertscore_F1": F_baseline.item(),
        "rag_output": rag_output,
        "rag_bertscore_F1": F_rag.item()
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"BERTScore F1 (Baseline): {r['baseline_bertscore_F1']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"BERTScore F1 (RAG): {r['rag_bertscore_F1']:.3f}")

# Step 7: Calculate overall average BERTScore F1
baseline_scores = [r['baseline_bertscore_F1'] for r in results]
rag_scores = [r['rag_bertscore_F1'] for r in results]

avg_baseline_bertscore = sum(baseline_scores) / len(baseline_scores)
avg_rag_bertscore = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average BERTScore F1 (Baseline): {avg_baseline_bertscore:.3f}")
print(f"Overall Average BERTScore F1 (RAG-Enhanced): {avg_rag_bertscore:.3f}")


Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---
According to Microsoft's quarterly report for fiscal year 2016, which ended on December 31, 2015 (Q3 FY16), the Total Revenue from Continuing Operations was $22.18 billion.

The Income from Continuing Operations before Taxes (Operating Income) was $7.28 billion.

Here is a summary of the financial information:

* Total Revenue from Continuing Operations: $22.18 billion
* Operating Income (Income from Continuing Operations before Taxes): $7.28 billion

Please note that these figures are based on publicly available data and have not been audited or reviewed by me. If you're looking for more detailed or up-to-date information, I recommend checking Microsoft's official financial reports or seeking guidance from a financial professional.

Would you like to know a

## Llama2 Bertscore

In [19]:
from bert_score import score as bert_score
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local LLaMA 2 model
def query_llama2(prompt):
    response = client.chat.completions.create(
        model="llama2",  # Must match the model you pulled in Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

# Step 4: Your evaluation data
# (same eval_data as you already prepared — your 28 questions)
# For brevity, I’ll skip re-pasting your big list — use exactly the same
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]

# Step 5: Evaluation loop — Baseline and RAG with BERTScore
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    
    # === Step: Retrieve context using FAISS ===
    query_vector = model.encode(query)
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
    
    ### Baseline (no context)
    baseline_output = query_llama2(f"Query: {query}")
    P_baseline, R_baseline, F_baseline = bert_score([baseline_output], [expected], lang="en", rescale_with_baseline=True)
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_llama2(rag_prompt)
    P_rag, R_rag, F_rag = bert_score([rag_output], [expected], lang="en", rescale_with_baseline=True)
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_bertscore_F1": F_baseline.item(),
        "rag_output": rag_output,
        "rag_bertscore_F1": F_rag.item()
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"BERTScore F1 (Baseline): {r['baseline_bertscore_F1']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"BERTScore F1 (RAG): {r['rag_bertscore_F1']:.3f}")

# Step 7: Calculate overall average BERTScore F1
baseline_scores = [r['baseline_bertscore_F1'] for r in results]
rag_scores = [r['rag_bertscore_F1'] for r in results]

avg_baseline_bertscore = sum(baseline_scores) / len(baseline_scores)
avg_rag_bertscore = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average BERTScore F1 (Baseline): {avg_baseline_bertscore:.3f}")
print(f"Overall Average BERTScore F1 (RAG-Enhanced): {avg_rag_bertscore:.3f}")

Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---

As a financial data analysis assistant, I can provide you with the Income from Continuing Operations before Taxes of Microsoft Corp. for 2016 Q3. According to the Microsoft Quarterly Earnings Reports, the Income from Continuing Operations before Taxes for the third quarter of 2016 was $8.77 billion.

Here is the exact figure from the Microsoft Quarterly Earnings Report for 2016 Q3:

"Income from Continuing Operations before Taxes totaled $8,773 million for the third quarter of 2016, compared to $6,543 million in the same period of the prior year. The increase was primarily due to higher revenue and improved margins in our More Personal Computing and Productivity and Business Processes segments."

I hope this information helps! Let me know if you have any ot

## Qwen2:7b Bertscore

In [20]:
from bert_score import score as bert_score
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local Qwen2:7b model
def query_qwen2(prompt):
    response = client.chat.completions.create(
        model="qwen2:7b",  # Use exact model name you pulled via Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content
# Step 4: Your evaluation data
# (same eval_data as you already prepared — your 28 questions)
# For brevity, I’ll skip re-pasting your big list — use exactly the same
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]

# Step 5: Evaluation loop — Baseline and RAG with BERTScore
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    
    # === Step: Retrieve context using FAISS ===
    query_vector = model.encode(query)
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
    
    ### Baseline (no context)
    baseline_output = query_qwen2(f"Query: {query}")
    P_baseline, R_baseline, F_baseline = bert_score([baseline_output], [expected], lang="en", rescale_with_baseline=True)
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_qwen2(rag_prompt)
    P_rag, R_rag, F_rag = bert_score([rag_output], [expected], lang="en", rescale_with_baseline=True)
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_bertscore_F1": F_baseline.item(),
        "rag_output": rag_output,
        "rag_bertscore_F1": F_rag.item()
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"BERTScore F1 (Baseline): {r['baseline_bertscore_F1']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"BERTScore F1 (RAG): {r['rag_bertscore_F1']:.3f}")

# Step 7: Calculate overall average BERTScore F1
baseline_scores = [r['baseline_bertscore_F1'] for r in results]
rag_scores = [r['rag_bertscore_F1'] for r in results]

avg_baseline_bertscore = sum(baseline_scores) / len(baseline_scores)
avg_rag_bertscore = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average BERTScore F1 (Baseline): {avg_baseline_bertscore:.3f}")
print(f"Overall Average BERTScore F1 (RAG-Enhanced): {avg_rag_bertscore:.3f}")

Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---
To provide an accurate answer, I would typically consult financial records which contain detailed information on individual companies' earnings over specified periods. However, as a text-based model, I don't directly access the web or specific databases for real-time data.

Nevertheless, based on historical sources found publicly online, Microsoft Corporation in Q3 of 2016 reported Total Revenue at approximately $8.76 billion and net income (income from continuing operations) before taxes amounted to around *$925 million*. This figure is based on financial reports submitted by Microsoft Corp.

Keep in mind that these figures may not be the exact values given the potential for rounding or specific adjustments in reported numbers, so always cross-check with a 

## Yi:9b Bertscore

In [21]:
from bert_score import score as bert_score
from openai import OpenAI

# Connect to your local Ollama server
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama runs here by default
    api_key="ollama-key"  # Arbitrary; Ollama doesn't validate this
)

# Function to generate response using your local Yi:9b model
def query_yi9b(prompt):
    response = client.chat.completions.create(
        model="yi:9b",  # Use exact model name you pulled via Ollama
        messages=[
            {"role": "system", "content": "You are a smart financial data analysis assistant"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content

# (same eval_data as you already prepared — your 28 questions)
# For brevity, I’ll skip re-pasting your big list — use exactly the same
eval_data = [
    {
        "query": "What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?",
        "expected_answer": "The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2016 Q3?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2016 Q3 was $2,528,966,000."
    },
    {
        "query": "What was the Total Liabilities and Equity of LUMINEX CORP in 2015 Q2?",
        "expected_answer": "The Total Liabilities and Equity of LUMINEX CORP in 2015 Q2 was $369,032,000."
    },
    {
        "query": "What was the Operating Income (Loss) of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Operating Income (Loss) of NETFLIX INC in 2015 Q1 was $97,456,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Recon Technology, Ltd in 2015 Q2 was $1,857,964."
    },

    {
        "query": "What was the Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2?",
        "expected_answer": "The Total Liabilities and Equity of DAWSON GEOPHYSICAL CO in 2016 Q2 was $215,555,000."
    },
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2016 Q2?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2016 Q2 was $-123,954."
    },
    {
        "query": "What was the Total Equity of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Total Equity of NETFLIX INC in 2014 Q2 was $1,609,705,000."
    },
    {
        "query": "What was the Assets of TUCOWS INC /PA/ in 2015 Q2?",
        "expected_answer": "The Assets of TUCOWS INC /PA/ in 2015 Q2 was $139,297,190."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of DAWSON GEOPHYSICAL CO in 2016 Q1 was $30,378,000."
    },
    {
        "query": "What was the Assets of Recon Technology, Ltd in 2015 Q1?",
        "expected_answer": "The Assets of Recon Technology, Ltd in 2015 Q1 was $160,981,515."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of Xenith Bankshares, Inc. in 2015 Q3 was $26,749,000."
    },
    {
        "query": "What was the Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2?",
        "expected_answer": "The Income from Continuing Operations before Taxes of NETFLIX INC in 2014 Q2 was $117,372,000."
    },
    {
        "query": "What was the Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1?",
        "expected_answer": "The Cash and Cash Equivalents, at Carrying Value of NETFLIX INC in 2015 Q1 was $2,454,777,000."
    },
    
    {
        "query": "What was the Gross Profit of Recon Technology, Ltd in 2015 Q4?",
        "expected_answer": "The Gross Profit of Recon Technology, Ltd in 2015 Q4 was $5,762,679."
    },
    
    {
        "query": "Which year saw the largest increase in total equity for 1ST SOURCE CORP between 2014 to 2016?",
        "expected_answer": "Analysis: 2014 Q4: $614,473,000  2015 Q4: $644,053,000 2016 Q4: $672,650,000 Annual increases: 2014 → 2015: $644,053,000 - $614,473,000 = $29,580,000 2015 → 2016: $672,650,000 - $644,053,000 = $28,597,000 Conclusion: The largest increase was from 2014 to 2015."},
    
    
    {
        "query": "Has the 2U, Inc. 's total liabilities increased or decreased between 2014 and 2016?",
        "expected_answer": "otal Liabilities = Total Liabilities and Equity - Total Equity ; 2014 Q4: $113,039,000 - $88,011,000 = $25,028,000 ; 2016 Q4: $244,320,000 - $195,237,000 = $49,083,000 ; Conclusion: 2U, Inc.'s total liabilities increased from 2014 to 2016."
    },
    
    {
        "query": "Is the 3D MAKERJET’s cash position improving, and how might that impact future investments?",
        "expected_answer": "The cash position is declining over the period, dropping to only $663 by 2016 Q1. This suggests liquidity issues, which could be a significant risk for future investors."
    },

    {
        "query": "Has there been any quarter with negative net income AAON INC between 2014 to 2016? If so, when?",
        "expected_answer": "No quarter had negative net income for AAON INC in this period"
    },

    {
        "query": "How did ALIGN TECHNOLOGY INC perform during Q4s compared to other quarters?",
        "expected_answer": "ALIGN TECHNOLOGY INC generally performs better in Q4 compared to other quarters, with Q4 net income typically being the highest or near-highest in each year. This suggests a seasonal boost, likely due to increased sales activity toward year-end."
    },

    {
        "query": "What is bluebird bio, Inc. average quarterly growth in assets between 2014 to 2016?",
        "expected_answer": "bluebird bio, Inc. had an average quarterly asset growth of approximately 15.7% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "Calculate the percentage change in total liabilities from 2014 Q1 to 2016 Q4 of China Biologic Products Holdings, Inc.",
        "expected_answer": "China Biologic Products Holdings, Inc.'s total liabilities increased by about 60% from 2014 Q1 to 2016 Q4."
    },

    {
        "query": "If you were to forecast COGNIZANT TECHNOLOGY SOLUTIONS CORP's next quarter’s net income, what would your estimate be based on the current trends?",
        "expected_answer": "Forecast: Based on the trend, the next quarter's net income would likely be in the $400,000–$450,000 range, possibly around $430,000 (assuming no extraordinary events)."
    },

    {
        "query": "In which quarter between 2014 and 2026 did HANDY & HARMAN LTD. have the highest income from continuing operations before taxes?",
        "expected_answer": "Analysis: Check HANDY & HARMAN LTD. (ticker: HNH): Highest reported income from continuing operations before taxes: 2015 Q2: $11,602,000 (as an example based on visible data) All other quarters are lower. Conclusion: 2015 Q2 was the quarter with the highest income from continuing operations before taxes for HANDY & HARMAN LTD."
    },

    {
        "query": "What is the correlation between total assets and final revenue?",
        "expected_answer": "The asset turnover ratio measures a company's total revenue relative to the value of its assets. The asset turnover ratio indicates how efficiently the company is using its assets to generate revenue. The higher the asset turnover ratio, the more efficient a company is."
    },

    {
        "query": "Compare net income for Q1 across years 2014, 2015 and 2016 for company Hudson Global, Inc.",
        "expected_answer": "Conclusion: Net income improved each year, moving from loss in 2014 and 2015 to a profit in 2016."
    },

    {
        "query": "What risks are indicated by Jensyn Acquisition Corp. liabilities trend from 2014 to 2016?",
        "expected_answer": "Analysis: Check total liabilities for Jensyn Acquisition Corp (ticker: JSYN): 2014 Q1: Liabilities = $72,000 (Liabilities and Equity: $5,000,000; Equity: $4,928,000) 2016 Q4: Liabilities = $2,000,000 (Liabilities and Equity: $5,000,000; Equity: $3,000,000) ; Trend: Significant increase in liabilities, while equity decreased. Conclusion: Rising liabilities and declining equity signal increasing financial risk, suggesting liquidity concerns or funding through debt, which could impact solvency if not matched by future asset growth or revenue."
    },

    {
        "query": "What was the best performing quarter overall based on multiple indicators for LUMINEX CORP?",
        "expected_answer": "2015 Q4 was the best performing quarter for LUMINEX CORP based on revenue, net income, and asset values."
    }

    
    # Add your full 28 here
]

# Step 5: Evaluation loop — Baseline and RAG with BERTScore
results = []

for item in eval_data:
    query = item["query"]
    expected = item["expected_answer"]
    
    # === Step: Retrieve context using FAISS ===
    query_vector = model.encode(query)
    faiss.normalize_L2(np.array([query_vector]))
    D, I = index.search(np.array([query_vector]), k=100)
    retrieved_context = "\n".join([text_mapping[i] for i in I[0]])
    
    ### Baseline (no context)
    baseline_output = query_yi9b(f"Query: {query}")
    P_baseline, R_baseline, F_baseline = bert_score([baseline_output], [expected], lang="en", rescale_with_baseline=True)
    
    ### RAG-enhanced
    rag_prompt = f"Context:\n{retrieved_context}\n\nQuery: {query}"
    rag_output = query_yi9b(rag_prompt)
    P_rag, R_rag, F_rag = bert_score([rag_output], [expected], lang="en", rescale_with_baseline=True)
    
    # Save result
    results.append({
        "query": query,
        "expected_answer": expected,
        "baseline_output": baseline_output,
        "baseline_bertscore_F1": F_baseline.item(),
        "rag_output": rag_output,
        "rag_bertscore_F1": F_rag.item()
    })

# Step 6: Print full results
for r in results:
    print("="*80)
    print(f"Query: {r['query']}")
    print(f"Expected: {r['expected_answer']}\n")
    
    print(f"--- Baseline Output ---")
    print(r['baseline_output'])
    print(f"BERTScore F1 (Baseline): {r['baseline_bertscore_F1']:.3f}\n")
    
    print(f"--- RAG-Enhanced Output ---")
    print(r['rag_output'])
    print(f"BERTScore F1 (RAG): {r['rag_bertscore_F1']:.3f}")

# Step 7: Calculate overall average BERTScore F1
baseline_scores = [r['baseline_bertscore_F1'] for r in results]
rag_scores = [r['rag_bertscore_F1'] for r in results]

avg_baseline_bertscore = sum(baseline_scores) / len(baseline_scores)
avg_rag_bertscore = sum(rag_scores) / len(rag_scores)

print("\n" + "="*80)
print(f"Overall Average BERTScore F1 (Baseline): {avg_baseline_bertscore:.3f}")
print(f"Overall Average BERTScore F1 (RAG-Enhanced): {avg_rag_bertscore:.3f}")

Query: What was the Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3?
Expected: The Income from Continuing Operations before Taxes of MICROSOFT CORP in 2016 Q3 was $5,325,000,000.

--- Baseline Output ---
In the fiscal year 2016 third quarter (July 1, 2016 - September 30, 2016), Microsoft Corporation reported an Income from Continuing Operations before Taxes of $8.4 billion. This amount was also reported as Net income in millions of dollars and represented a 7% increase year over year compared to the $7.95 billion reported for Q3 2015. These data comes from their official quarterly earnings report available on their financials page, <https://www.microsoft.com/en-US/finance/ investor_reports?year=2016>. As always please remember to consult additional reliable sources or financial advisors before making final investment decisions.
BERTScore F1 (Baseline): 0.043

--- RAG-Enhanced Output ---
The Income from Continuing Operations before Taxes for Microsoft Corp in