In [23]:
import pandas as pd
import ast

In [24]:
df = pd.read_csv("./time_test_result.csv")

In [25]:
# Średni czas generowania odpowiedzi dla każdego modelu
mean_gen_time_per_model = df.groupby("model")["generation_time_s"].mean()

In [26]:
# Średni czas wyboru top-K kontekstów (dla każdego wariantu)
mean_topk_time_per_variant = df.groupby("variant")["top_k_context_selection_time_s(grover_or_classic)"].mean()

In [27]:
# Średni czas wyboru top-10 kontekstów (dla każdego wariantu)
mean_top10_time_per_variant = df.groupby("variant")["top10_contexts_selection_time_s"].mean()

In [28]:
# Porównanie top-K kontekstów Grover vs classic
# Tworzymy słownik: {(sample_id, top_k): [contexts]}
context_map = {}
for _, row in df.iterrows():
    variant = row["variant"]
    top_k = 3 if "top3" in variant else 1 if "top1" in variant else 0
    if top_k == 0:
        continue
    key = (row["sample_id"], top_k)
    source = "grover" if "grover" in variant else "classic"
    context_map.setdefault(key, {})[source] = ast.literal_eval(row["contexts_used"])


In [29]:
# Sprawdzenie pokryć
matches = []
for key, val in context_map.items():
    if "grover" in val and "classic" in val:
        match = set(val["grover"]) == set(val["classic"])
        matches.append((key, match))
        if not match:
            print(f"\nMismatch for {key}: Grover contexts: {val['grover']}, Classic contexts: {val['classic']}")

# Procent zgodności
matching_percent = round(sum(1 for _, m in matches if m) / len(matches) * 100, 2) if matches else 0.0


Mismatch for (43, 1): Grover contexts: [], Classic contexts: ["All of Notre Dame's undergraduate students are a part of one of the five undergraduate colleges at the school or are in the First Year of Studies program. The First Year of Studies program was established in 1962 to guide incoming freshmen in their first year at the school before they have declared a major. Each student is given an academic advisor from the program who helps them to choose classes that give them exposure to any major in which they are interested. The program also includes a Learning Resource Center which provides time management, collaborative learning, and subject tutoring. This program has been recognized previously, by U.S. News & World Report, as outstanding."]


In [30]:
# Średnie cosine similarity i word overlap dla każdej kombinacji model + wariant
mean_metrics = df.groupby(["model", "variant"])[["cosine", "overlap"]].mean()

In [31]:
# Wyświetlenie
print("1. Średni czas generowania odpowiedzi dla każdego modelu:")
print(mean_gen_time_per_model)
print("\n2. Średni czas wyboru top-K kontekstów dla każdego wariantu:")
print(mean_topk_time_per_variant)
print("\n3. Średni czas wyboru top-10 kontekstów dla każdego wariantu:")
print(mean_top10_time_per_variant)
print(f"\n4. Procent zgodnych kontekstów (Grover vs Classic): {matching_percent}%")
print("\n5. Średnie cosine similarity i word overlap:")
print(mean_metrics)

1. Średni czas generowania odpowiedzi dla każdego modelu:
model
llama-3-8b      2.561786
mixtral-8x7b    1.184821
phi-3.5         2.651964
Name: generation_time_s, dtype: float64

2. Średni czas wyboru top-K kontekstów dla każdego wariantu:
variant
classic_top1    0.000000
classic_top3    0.000000
grover_top1     0.028929
grover_top3     0.030357
no_context      0.000000
Name: top_k_context_selection_time_s(grover_or_classic), dtype: float64

3. Średni czas wyboru top-10 kontekstów dla każdego wariantu:
variant
classic_top1    0.297321
classic_top3    0.297321
grover_top1     0.297321
grover_top3     0.297321
no_context      0.297321
Name: top10_contexts_selection_time_s, dtype: float64

4. Procent zgodnych kontekstów (Grover vs Classic): 99.11%

5. Średnie cosine similarity i word overlap:
                             cosine    overlap
model        variant                          
llama-3-8b   classic_top1  0.753046  38.770000
             classic_top3  0.800580  46.398393
          

In [34]:
# Zapisanie wyników do pliku CSV
output_df = pd.DataFrame(mean_metrics).reset_index()
output_df.to_csv("mean_metrics_test2.csv", index=False)