### Getting the data

In [2]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from tqdm.auto import tqdm

  from tqdm.autonotebook import tqdm, trange


In [3]:
url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/04-monitoring/data/results-gpt4o-mini.csv?raw=1'
df = pd.read_csv(url)
df = df.iloc[:300]
df.head()

Unnamed: 0,answer_llm,answer_orig,document,question,course
0,You can sign up for the course by visiting the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Where can I sign up for the course?,machine-learning-zoomcamp
1,You can sign up using the link provided in the...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Can you provide a link to sign up?,machine-learning-zoomcamp
2,"Yes, there is an FAQ for the Machine Learning ...",Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Is there an FAQ for this Machine Learning course?,machine-learning-zoomcamp
3,The context does not provide any specific info...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,Does this course have a GitHub repository for ...,machine-learning-zoomcamp
4,To structure your questions and answers for th...,Machine Learning Zoomcamp FAQ\nThe purpose of ...,0227b872,How can I structure my questions and answers f...,machine-learning-zoomcamp


### Q1. Getting the embeddings model

In [4]:
embedding_model = SentenceTransformer('multi-qa-mpnet-base-dot-v1')

In [5]:
answer_llm = df.iloc[0].answer_llm
embeddings = embedding_model.encode(answer_llm)
first_value = embeddings[0]
first_value

-0.42244655

### Q2. Computing the dot product

In [6]:
evaluations = []
for i, row in tqdm(df.iterrows()):
    embeddings_answer_llm = embedding_model.encode(row.answer_llm)
    embeddings_answer_orig = embedding_model.encode(row.answer_orig)
    evaluations.append(embeddings_answer_llm.dot(embeddings_answer_orig))

300it [01:25,  3.52it/s]


In [7]:
np.percentile(evaluations, 75)

31.67430877685547

### Q3. Computing the cosine

In [8]:
def vector_normalized(vector):
    norm = np.sqrt((vector * vector).sum())
    return vector / norm

In [9]:
cosine_score = []
for i, row in tqdm(df.iterrows()):
    embeddings_answer_llm = embedding_model.encode(row.answer_llm)
    embeddings_answer_orig = embedding_model.encode(row.answer_orig)

    embeddings_answer_llm_normalized = vector_normalized(embeddings_answer_llm)
    embeddings_answer_orig_normalized = vector_normalized(embeddings_answer_orig)

    cosine_score.append(embeddings_answer_llm_normalized.dot(embeddings_answer_orig_normalized))

300it [01:25,  3.52it/s]


In [10]:
np.percentile(cosine_score, 75)

0.8362348973751068

### Q4. Rouge

In [11]:
from rouge import Rouge
rouge_scorer = Rouge()

ModuleNotFoundError: No module named 'rouge'

In [None]:
row_10 = df.iloc[10]
scores = rouge_scorer.get_scores(row_10.answer_llm, row_10.answer_orig)
scores[0]['rouge-1']['f']


### Q5. Average rouge score

In [None]:
rouge_1 = scores['rouge-1']['f']
rouge_2 = scores['rouge-2']['f']
rouge_l = scores['rouge-l']['f']
average_rouge = (rouge_1_f1 + rouge_2_f1 + rouge_l_f1) / 3
average_rouge

### Q6. Average rouge score for all the data points

In [None]:
for i, row in tqdm(df.iterrows()):
    scores = rouge_scorer.get_scores(row.answer_llm, row.answer_orig)
    rouge_l_f1 = scores[0]["rouge-l"]["f"]
    df.at[i, "rouge_l_f1"] = rouge_l_f1  

In [None]:
df.head()

In [None]:
df.rouge_l_f1.mean()