In [None]:
import requests
import json
import re
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, classification_report

    # Cautarea raspunsului corect
def extrage_raspuns_corect(text):
    match_latex = re.search(r"\\boxed\{\s*([a-fA-F])\s*\}", text)
    if match_latex:
        return match_latex.group(1).lower()

    match_std = re.search(r"[Rr]ăspuns\s+(corect\s*[:\-]?\s*)?([a-fA-F])", text)
    if match_std:
        return match_std.group(2).lower()

    return "?"

    # Incarcare shots
def incarca_shots(cale_fisier):
    if not cale_fisier:
        return []
    with open(cale_fisier, "r", encoding="utf-8") as f:
        return [json.loads(line) for line in f]

    # Functia de evaluare a modelului
def evalueaza_modelul_fireworks(test_file_path, model_name, api_key, label="Set", output_csv=True, shots_file=None):
    url = "https://api.fireworks.ai/inference/v1/chat/completions"
    headers = {
        "Accept": "application/json",
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    with open(test_file_path, "r", encoding="utf-8") as f:
        test_data = [json.loads(line) for line in f]

    shots_data = incarca_shots(shots_file)

    # Pregatirea setului de date pt incarcare
    prompts, gold, predictions, outputs_raw = [], [], [], []

    # Rularea modelului  
    for idx, sample in enumerate(tqdm(test_data, desc=f"Evaluare {label}")):
        user_msg = next((m["content"] for m in sample["messages"] if m["role"].lower() == "user"), "")
        assistant_msg = next((m["content"] for m in sample["messages"] if m["role"].lower() in {"chatbot", "assistant"}), "")
        true_answer = extrage_raspuns_corect(assistant_msg)
        gold.append(true_answer)

        full_prompt = []
        for shot in shots_data:
            shot_user = next((m["content"] for m in shot["messages"] if m["role"].lower() == "user"), "")
            shot_assistant = next((m["content"] for m in shot["messages"] if m["role"].lower() in {"chatbot", "assistant"}), "")
            full_prompt.append({"role": "user", "content": shot_user})
            full_prompt.append({"role": "assistant", "content": shot_assistant})

        task_prompt = {
            "role": "user",
            "content": user_msg.strip() + "\n\nInstrucțiuni: Alege litera răspunsului corect (a–f). Răspunde exact în formatul: Răspuns corect: x"
        }
        full_prompt.append(task_prompt)

        if idx == 0:
            print("\n=== Prompt complet trimis (exemplul 1) ===")
            for m in full_prompt:
                print(f"[{m['role'].upper()}] {m['content']}\n")
            print("=== Sfârșit prompt ===\n")

        prompts.append(user_msg)

        try:
            payload = {
                "model": model_name,
                "max_tokens": 4096,
                "temperature": 0,
                "top_p": 1,
                "top_k": 40,
                "presence_penalty": 0,
                "frequency_penalty": 0,
                "messages": full_prompt
            }

            response = requests.post(url, headers=headers, data=json.dumps(payload))
            response_json = response.json()
            reply = response_json["choices"][0]["message"]["content"].strip()
            predicted = extrage_raspuns_corect(reply)
        except Exception as e:
            print("Eroare:", e)
            reply = "Eroare"
            predicted = "?"

        predictions.append(predicted)
        outputs_raw.append(reply)

    # Extragerea tuturor informatiilor de la prompt si le aranjam 
    
    df = pd.DataFrame({
        "prompt": prompts,
        "raspuns_corect": gold,
        "pred": predictions,
        "output": outputs_raw
    })
    df["correct"] = df["raspuns_corect"] == df["pred"]
    df["correct"] = df["correct"].map({True: "TRUE", False: "FALSE"})

    # Date si rapoarte
    acc = accuracy_score(df["raspuns_corect"], df["pred"])
    print(f"\nAcuratețe ({label}): {acc:.2%}")
    print("\n=== Raport ===")
    print(classification_report(df["raspuns_corect"], df["pred"], zero_division=0))

    if output_csv:
        csv_name = f"rezultate_{label.lower().replace(' ', '_')}.csv"
        df.to_csv(csv_name, index=False, encoding="utf-8-sig", quoting=1)
        print(f"Rezultatele au fost salvate în: {csv_name}")

    return acc

In [None]:
# Setări
from dotenv import load_dotenv
import os
load_dotenv("Chei.env")
API_KEY = os.getenv("FIREWORKS_KEY")  # cheia ta OpenAI
MODEL_NAME = "accounts/fireworks/models/llama-v3p1-70b-instruct"  # model: LLAMA, DEEPSEEK, ..

In [None]:
# Testare pe setul de test
df = evalueaza_modelul_fireworks(
    test_file_path="fine_tune_test_full.jsonl",
    model_name=MODEL_NAME,
    api_key=API_KEY,
    label="Few-shot-Test",
    shots_file="few-shots.jsonl"
)
# Label - influentez numele fisierelor la iesire
# shots-file = None sau fisierul care are shots-urile

In [None]:
# Testare pe setul de validare
df = evalueaza_modelul_fireworks(
    test_file_path="fine_tune_test_full.jsonl",
    model_name=MODEL_NAME,
    api_key=API_KEY,
    label="Few-shot-Test",
    shots_file="few-shots.jsonl"
)
# Label - influentez numele fisierelor la iesire
# shots-file = None sau fisierul care are shots-urile

In [None]:
# Generare grafic
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("rezultate_few-shot-test.csv")

df["correct"] = df["raspuns_corect"] == df["pred"]

label_counts = df["raspuns_corect"].value_counts().sort_index()
correct_counts = df[df["correct"]]["raspuns_corect"].value_counts().sort_index()

plt.figure(figsize=(8, 5))
plt.bar(label_counts.index, label_counts.values, alpha=0.5, label="Total")
plt.bar(correct_counts.index, correct_counts.values, alpha=0.8, label="Corecte")
plt.title("Performanță model - set Test")
plt.xlabel("Variantă de răspuns")
plt.ylabel("Număr")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Generare grafic
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("rezultate_few-shot-validation.csv")

df["correct"] = df["raspuns_corect"] == df["pred"]

label_counts = df["raspuns_corect"].value_counts().sort_index()
correct_counts = df[df["correct"]]["raspuns_corect"].value_counts().sort_index()

plt.figure(figsize=(8, 5))
plt.bar(label_counts.index, label_counts.values, alpha=0.5, label="Total")
plt.bar(correct_counts.index, correct_counts.values, alpha=0.8, label="Corecte")
plt.title("Performanță model - set Validare")
plt.xlabel("Variantă de răspuns")
plt.ylabel("Număr")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Calculez acuratetea pe dificultate pentru fiecare set
import pandas as pd

def calculeaza_acuratete_pe_dificultate(cale_csv, eticheta_set):
    df = pd.read_csv(cale_csv)
    df["dificultate"] = df["prompt"].str.extract(r"\d+\.(?:\d+)?([ABC])\.")
    if "correct" not in df.columns:
        df["correct"] = df["raspuns_corect"] == df["pred"]
    acuratete = df.groupby("dificultate")["correct"].mean().reset_index()
    acuratete.columns = ["Dificultate", f"Acuratețe {eticheta_set}"]
    return acuratete

acuratete_test = calculeaza_acuratete_pe_dificultate("rezultate_few-shot-test.csv", "Test")
acuratete_val  = calculeaza_acuratete_pe_dificultate("rezultate_few-shot-validation.csv", "Validare")

df_comparatie = pd.merge(acuratete_test, acuratete_val, on="Dificultate", how="outer")
print(df_comparatie)

In [None]:
#Acuratete pe capitol pentru test
df_test = pd.read_csv("rezultate_few-shot-test.csv")
df_all = pd.read_csv("exercitii_extrase_structurat_rezolvari.csv")
df_test["id"] = df_test["prompt"].str.extract(r"^(\S+)").iloc[:, 0].str.replace(r"\.$", "", regex=True)
df_test["id"] = df_test["id"].astype(str)
df_all["id"] = df_all["id"].astype(str)
df_merged = df_test.merge(df_all[["id", "capitol"]], on="id", how="left")
if "correct" not in df_merged.columns:
    df_merged["correct"] = df_merged["true"] == df_merged["pred"]
accuracy_by_chapter = df_merged.groupby("capitol")["correct"].mean().reset_index()
accuracy_by_chapter.columns = ["Capitol", "Acuratețe"]
print(accuracy_by_chapter)

In [None]:
#Acuratete pe capitol pentru validare
df_validare = pd.read_csv("rezultate_few-shot-validation.csv")
df_all = pd.read_csv("exercitii_extrase_structurat_rezolvari.csv")
df_validare["id"] = df_validare["prompt"].str.extract(r"^(\S+)").iloc[:, 0].str.replace(r"\.$", "", regex=True)
df_validare["id"] = df_validare["id"].astype(str)
df_all["id"] = df_all["id"].astype(str)
df_merged = df_validare.merge(df_all[["id", "capitol"]], on="id", how="left")
if "correct" not in df_merged.columns:
    df_merged["correct"] = df_merged["true"] == df_merged["pred"]
accuracy_by_chapter = df_merged.groupby("capitol")["correct"].mean().reset_index()
accuracy_by_chapter.columns = ["Capitol", "Acuratețe"]
print(accuracy_by_chapter)

In [None]:
# Matrice de confuzie pentru test
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

df_test = pd.read_csv("rezultate_few-shot-test.csv")
cm = confusion_matrix(df_test["raspuns_corect"], df_test["pred"], labels=["a", "b", "c", "d", "e", "f"])
plt.figure(figsize=(4, 3.5))
sns.set(style="white", font_scale=0.9)
ax = sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap="Blues",
    cbar=True,
    linewidths=0.5,
    linecolor="black",
    xticklabels=list("abcdef"),
    yticklabels=list("abcdef"),
    annot_kws={"size": 10, "weight": "bold", "color": "black"}
)
ax.set_xlabel("Predicție", fontsize=10)
ax.set_ylabel("Adevărat", fontsize=10)
ax.set_title("Matrice de confuzie – set Test", fontsize=11, weight="bold")
plt.tight_layout()
plt.savefig("matrice_confuzie_test.png", dpi=300)
plt.show()

In [None]:
# Matrice de confuzie pentru validare
df_validare = pd.read_csv("rezultate_few-shot-validation.csv")
cm = confusion_matrix(df_validare["raspuns_corect"], df_validare["pred"], labels=["a", "b", "c", "d", "e", "f"])
plt.figure(figsize=(4, 3.5))
sns.set(style="white", font_scale=0.9)
ax = sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap="Blues",
    cbar=True,
    linewidths=0.5,
    linecolor="black",
    xticklabels=list("abcdef"),
    yticklabels=list("abcdef"),
    annot_kws={"size": 10, "weight": "bold", "color": "black"}
)
ax.set_xlabel("Predicție", fontsize=10)
ax.set_ylabel("Adevărat", fontsize=10)
ax.set_title("Matrice de confuzie – set Validare", fontsize=11, weight="bold")
plt.tight_layout()
plt.savefig("matrice_confuzie_validare.png", dpi=300)
plt.show()