## Homework 04-monitoring

In [25]:
from tqdm.auto  import tqdm

import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from rouge import Rouge

In [9]:
github_url='https://raw.githubusercontent.com/DataTalksClub/llm-zoomcamp/main/04-monitoring/data/results-gpt4o-mini.csv'
url = f'{github_url}?raw=1'
df = pd.read_csv(url)
df = df.iloc[:300]

### Q1. Getting the embeddings model

In [10]:
model_name = 'multi-qa-mpnet-base-dot-v1'
model = SentenceTransformer(model_name)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/8.71k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [14]:
answer_llm = df.iloc[0].answer_llm
model.encode(answer_llm)[0]


-0.42244673

### Q2. Computing the dot product

In [16]:
df.head(1)

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp


In [19]:
evaluations = []

for answer_original, answer_llm in tqdm(zip(df["answer_orig"], df["answer_llm"])):
    answer_original = model.encode(answer_original)
    answer_llm = model.encode(answer_llm)
    dot_product = answer_original.dot(answer_llm)
    evaluations.append(dot_product)

0it [00:00, ?it/s]

In [22]:
round(np.percentile(evaluations, 75), 2)

31.67

### Q3. Computing the cosine

In [23]:
def normalize_vector(v):
    """
    Return normalized input vector
    """
    norm = np.sqrt((v * v).sum())
    v_norm = v / norm
    return v_norm

evaluations = []

for answer_original, answer_llm in tqdm(zip(df["answer_orig"], df["answer_llm"])):
    answer_original = model.encode(answer_original)
    answer_llm = model.encode(answer_llm)
    dot_product = normalize_vector(answer_original).dot(normalize_vector(answer_llm))
    evaluations.append(dot_product)

0it [00:00, ?it/s]

In [24]:
round(np.percentile(evaluations, 75), 2)

0.84

### Q4. Rouge

In [33]:
df[df.index == 10]

Unnamed: 0,answer_llm,answer_orig,document,question,course
10,"Yes, all sessions are recorded, so if you miss...","Everything is recorded, so you won’t miss anyt...",5170565b,Are sessions recorded if I miss one?,machine-learning-zoomcamp


In [34]:
rouge_scorer = Rouge()

scores = rouge_scorer.get_scores(df[df.index == 10]['answer_llm'], df[df.index == 10]['answer_orig'])[0]

In [42]:
round(scores['rouge-1']['f'], 2)

0.45

### Q5. Average rouge score

In [41]:
round((scores['rouge-1']['f'] + scores['rouge-2']['f'] + scores['rouge-l']['f'])/ 3, 2)


0.35

### Q6. Average rouge score for all the data points

In [46]:
rouge_scores = []

for answer_original, answer_llm in tqdm(zip(df["answer_orig"], df["answer_llm"])):
    rouge_scorer = Rouge()
    scores = rouge_scorer.get_scores(answer_original, answer_llm)[0]
    rouge_1 = scores['rouge-1']['f']
    rouge_2 = scores['rouge-2']['f']
    rouge_l = scores['rouge-l']['f']
    rouge_avg = (rouge_1 + rouge_2 + rouge_l) / 3
    rouge_scores.append((rouge_1, rouge_2, rouge_l, rouge_avg))

df_rouge_scores = pd.DataFrame(rouge_scores, columns =['rouge_1', 'rouge_2', 'rouge_l', 'rouge_avg'])


0it [00:00, ?it/s]

In [49]:
df_rouge_scores.head()

Unnamed: 0,rouge_1,rouge_2,rouge_l,rouge_avg
0,0.095238,0.028169,0.095238,0.072882
1,0.125,0.055556,0.09375,0.091435
2,0.415584,0.177778,0.363636,0.319
3,0.216216,0.047059,0.135135,0.132803
4,0.142076,0.033898,0.120219,0.098731


In [51]:
df_rouge_scores.rouge_2.mean().round(2)

0.21