In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from tqdm.autonotebook import tqdm, trange

  from tqdm.autonotebook import tqdm, trange


## Getting the data

In [2]:
github_url = "https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv"
url = f'{github_url}?raw=1'
df = pd.read_csv(url)

In [3]:
df = df.iloc[:300]

In [4]:
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


## Q1. Getting the embeddings model

In [5]:
model_name = "multi-qa-mpnet-base-dot-v1"
embedding_model = SentenceTransformer(model_name)

In [6]:
answer_llm = df.iloc[0].answer_llm

In [7]:
embeddings = embedding_model.encode(answer_llm)

In [8]:
embeddings[0]

-0.42244655

## Q2. Computing the dot product

In [9]:
evaluations = []
for i in tqdm(range(len(df))):
    row = df.iloc[i]
    embedding_answer_llm = embedding_model.encode(row["answer_llm"])
    embedding_answer_orig = embedding_model.encode(row["answer_orig"])
    evaluations.append(embedding_answer_llm.dot(embedding_answer_orig))


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [02:24<00:00,  2.07it/s]


In [10]:
np.percentile(evaluations, 75)

31.67430877685547

## Q3. Computing the cosine

In [11]:
def normalize_vector(v):
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm

In [12]:
evaluations_normalized = []
for i in tqdm(range(len(df))):
    row = df.iloc[i]
    embedding_answer_llm = normalize_vector(embedding_model.encode(row["answer_llm"]))
    embedding_answer_orig = normalize_vector(embedding_model.encode(row["answer_orig"]))
    
    evaluations_normalized.append(embedding_answer_llm.dot(embedding_answer_orig))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [02:23<00:00,  2.09it/s]


In [13]:
np.percentile(evaluations_normalized, 75)

0.8362348973751068

## Q4. Rouge

In [14]:
!pip install rouge

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [15]:
df[df.document == '5170565b']

Unnamed: 0,answer_llm,answer_orig,document,question,course
10,"Yes, all sessions are recorded, so if you miss...","Everything is recorded, so you won’t miss anyt...",5170565b,Are sessions recorded if I miss one?,machine-learning-zoomcamp
11,"Yes, you can ask your questions in advance if ...","Everything is recorded, so you won’t miss anyt...",5170565b,Can I ask questions in advance if I can't atte...,machine-learning-zoomcamp
12,"If you miss a session, don't worry! Everything...","Everything is recorded, so you won’t miss anyt...",5170565b,How will my questions be addressed if I miss a...,machine-learning-zoomcamp
13,"Yes, there is a way to catch up on a missed se...","Everything is recorded, so you won’t miss anyt...",5170565b,Is there a way to catch up on a missed session?,machine-learning-zoomcamp
14,"Yes, you can still interact with instructors a...","Everything is recorded, so you won’t miss anyt...",5170565b,Can I still interact with instructors after mi...,machine-learning-zoomcamp


In [34]:
r = df.iloc[10]

In [35]:
r

answer_llm      Yes, all sessions are recorded, so if you miss...
answer_orig     Everything is recorded, so you won’t miss anyt...
document                                                 5170565b
question                     Are sessions recorded if I miss one?
course                                  machine-learning-zoomcamp
rouge_scores                                               0.3549
rouge_1                                                  0.454545
rouge_2                                                  0.216216
rouge_l                                                  0.393939
Name: 10, dtype: object

In [36]:
from rouge import Rouge
rouge_scorer = Rouge()

scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]

In [37]:
scores["rouge-1"]

{'r': 0.45454545454545453, 'p': 0.45454545454545453, 'f': 0.45454544954545456}

## Q5. Average rouge score

In [40]:
def avergae_rougue_score(scores):
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3
    return rouge_avg

In [41]:
avergae_rougue_score(scores)

0.35490034990035496

## Q6. Average rouge score for all the data points

In [26]:
rouge_scores_average = []
rouge_1_list = []
rouge_2_list = []
rouge_l_list = []
for i in tqdm(range(len(df))):
    r = df.iloc[i]
    scores = rouge_scorer.get_scores(r['answer_llm'], r['answer_orig'])[0]
    rouge_1_list.append(scores['rouge-1']['f'])
    rouge_2_list.append(scores['rouge-2']['f'])
    rouge_l_list.append(scores['rouge-l']['f'])
    rouge_scores_average.append(avergae_rougue_score(scores))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 300/300 [00:00<00:00, 336.91it/s]


In [42]:
len(rouge_scores_average)

300

In [44]:
df["rouge_scores"] = pd.Series(rouge_scores_average)
df["rouge_1"] = pd.Series(rouge_1_list)
df["rouge_2"] = pd.Series(rouge_2_list)
df["rouge_l"] = pd.Series(rouge_l_list)

In [46]:
df.tail()

Unnamed: 0,answer_llm,answer_orig,document,question,course,rouge_scores,rouge_1,rouge_2,rouge_l
295,An alternative way to load the data using the ...,Above users showed how to load the dataset dir...,8d209d6d,What is an alternative way to load the data us...,machine-learning-zoomcamp,0.60457,0.654545,0.540984,0.618182
296,You can directly download the dataset from Git...,Above users showed how to load the dataset dir...,8d209d6d,How can I directly download the dataset from G...,machine-learning-zoomcamp,0.535991,0.590164,0.460432,0.557377
297,You can fetch data for homework using the `req...,Above users showed how to load the dataset dir...,8d209d6d,Could you share a method to fetch data for hom...,machine-learning-zoomcamp,0.618851,0.654867,0.564516,0.637168
298,If the status code is 200 when downloading dat...,Above users showed how to load the dataset dir...,8d209d6d,What should I do if the status code is 200 whe...,machine-learning-zoomcamp,0.247252,0.304762,0.132231,0.304762
299,If the file download fails when using the requ...,Above users showed how to load the dataset dir...,8d209d6d,What does the code using the requests library ...,machine-learning-zoomcamp,0.118954,0.179487,0.023529,0.153846


In [45]:
df.describe()

Unnamed: 0,rouge_scores,rouge_1,rouge_2,rouge_l
count,300.0,300.0,300.0,300.0
mean,0.313205,0.378844,0.206965,0.353807
std,0.158133,0.165977,0.15355,0.162965
min,0.0,0.0,0.0,0.0
25%,0.197358,0.261625,0.097809,0.228032
50%,0.29864,0.378762,0.178671,0.337792
75%,0.404169,0.479281,0.286181,0.451613
max,0.813043,0.85,0.73913,0.85
