In [1]:
import pandas as pd

In [2]:
github_url = 'https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv'

url = f'{github_url}?raw=1'
df = pd.read_csv(url)
print(df)

                                             answer_llm  \
0     You can sign up for the course by visiting the...   
1     You can sign up using the link provided in the...   
2     Yes, there is an FAQ for the Machine Learning ...   
3     The context does not provide any specific info...   
4     To structure your questions and answers for th...   
...                                                 ...   
1825  Some suggested titles for listing the Machine ...   
1826  It is best advised that you do not list the Ma...   
1827  You can incorporate your Machine Learning Zoom...   
1828  The advice on including a project link in a CV...   
1829  The suggestion to showcase progress through Li...   

                                            answer_orig  document  \
0     Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
1     Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
2     Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
3     Machine L

In [3]:
df = df.iloc[:300]
print(df)

                                            answer_llm  \
0    You can sign up for the course by visiting the...   
1    You can sign up using the link provided in the...   
2    Yes, there is an FAQ for the Machine Learning ...   
3    The context does not provide any specific info...   
4    To structure your questions and answers for th...   
..                                                 ...   
295  An alternative way to load the data using the ...   
296  You can directly download the dataset from Git...   
297  You can fetch data for homework using the `req...   
298  If the status code is 200 when downloading dat...   
299  If the file download fails when using the requ...   

                                           answer_orig  document  \
0    Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
1    Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
2    Machine Learning Zoomcamp FAQ\nThe purpose of ...  0227b872   
3    Machine Learning Zoomcamp 

In [4]:
from sentence_transformers import SentenceTransformer
import torch
model_name = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(model_name)
answer_llm = df.iloc[0].answer_llm

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## Q1. Getting the embeddings model

In [8]:
embedding = embedding_model.encode(answer_llm)
first_value = embedding[0]
print(first_value)

-0.4224467


## Q2. Computing the dot product

In [10]:
import numpy as np

evaluations = []

for _, row in df.iterrows():
    embedding_orig = embedding_model.encode(row.answer_orig)
    embedding_llm = embedding_model.encode(row.answer_llm)
    
    score = np.dot(embedding_orig, embedding_llm)
    evaluations.append(score)

percentile_75 = np.percentile(evaluations, 75)

In [11]:
print(percentile_75)

31.674309730529785


## Q3. Computing the cosine

In [12]:
def normalize_vector(v):
    norm = np.sqrt((v * v).sum())
    return v / norm

In [13]:
evaluations = []

for _, row in df.iterrows():
    embedding_orig = embedding_model.encode(row.answer_orig)
    embedding_llm = embedding_model.encode(row.answer_llm)
    
    # Normalize the embeddings
    embedding_orig_norm = normalize_vector(embedding_orig)
    embedding_llm_norm = normalize_vector(embedding_llm)
    
    # Compute cosine similarity (dot product of normalized vectors)
    score = np.dot(embedding_orig_norm, embedding_llm_norm)
    evaluations.append(score)

percentile_75 = np.percentile(evaluations, 75)

In [14]:
print(percentile_75)

0.836234912276268


## Q4. Rouge

In [15]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1



[notice] A new release of pip is available: 23.0.1 -> 24.1.2
[notice] To update, run: C:\Users\USER\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip


In [16]:
from rouge import Rouge
rouge_scorer = Rouge()

# Get the answers at index 10
r = df.iloc[10]

# Calculate ROUGE scores
scores = rouge_scorer.get_scores(r['answer_orig'], r['answer_llm'])[0]

# Extract the F1 score for rouge-1
rouge_1_f1 = scores['rouge-1']['f']

In [17]:
print(rouge_1_f1)

0.45454544954545456


## Q5. Average rouge score

In [18]:
from rouge import Rouge
rouge_scorer = Rouge()

# Get the answers at index 10
r = df.iloc[10]

# Calculate ROUGE scores
scores = rouge_scorer.get_scores(r['answer_orig'], r['answer_llm'])[0]

# Extract F1 scores for rouge-1, rouge-2, and rouge-l
rouge_1_f1 = scores['rouge-1']['f']
rouge_2_f1 = scores['rouge-2']['f']
rouge_l_f1 = scores['rouge-l']['f']

# Compute the average
average_rouge = (rouge_1_f1 + rouge_2_f1 + rouge_l_f1) / 3

In [19]:
print(average_rouge)

0.36500136000136507


## Q6. Average rouge score for all the data points

In [20]:
from rouge import Rouge
import pandas as pd

rouge_scorer = Rouge()

rouge_1_scores = []
rouge_2_scores = []
rouge_l_scores = []
rouge_avg_scores = []

for _, row in df.iterrows():
    scores = rouge_scorer.get_scores(row['answer_orig'], row['answer_llm'])[0]
    
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3
    
    rouge_1_scores.append(rouge_1)
    rouge_2_scores.append(rouge_2)
    rouge_l_scores.append(rouge_l)
    rouge_avg_scores.append(rouge_avg)

# Create a new dataframe with the scores
rouge_df = pd.DataFrame({
    'rouge_1': rouge_1_scores,
    'rouge_2': rouge_2_scores,
    'rouge_l': rouge_l_scores,
    'rouge_avg': rouge_avg_scores
})

# Calculate the average of rouge_2 across all records
average_rouge_2 = rouge_df['rouge_2'].mean()

# Print average of rouge_2 across all records
print(average_rouge_2)

0.20696501983423318
