In [None]:
%cd ..

In [None]:
import os
import json
from tqdm import tqdm

In [None]:
submissions = json.load(open("data/predictions/final_predictions_base.json"))

len(submissions)

In [None]:
if isinstance(submissions["predictions"][0], list):
    submissions["predictions"] = [item for sublist in submissions["predictions"] for item in sublist]
    submissions["expected"] = [item for sublist in submissions["expected"] for item in sublist]

In [None]:
submissions.keys()

In [None]:
predictions = submissions["predictions"]
expected = submissions["expected"]

In [None]:
predictions[:5]

In [None]:
expected[:5]

In [None]:
from sklearn.metrics import (
	accuracy_score,
	precision_score,
	recall_score,
	f1_score,
	classification_report,
)

global_accuracy = accuracy_score(expected, predictions)
global_precision = precision_score(expected, predictions, average="weighted", zero_division=0)
global_recall = recall_score(expected, predictions, average="weighted", zero_division=0)
global_f1 = f1_score(expected, predictions, average="weighted", zero_division=0)
report = classification_report(expected, predictions, output_dict=True, zero_division=0)

In [None]:
results = {
	"accuracy/avg": global_accuracy,
	"precision/avg": global_precision,
	"recall/avg": global_recall,
	"f1/avg": global_f1,
}

results

In [None]:
report["fra_Latn"]

In [None]:
from collections import defaultdict

clean_results = defaultdict(dict)

for k, v in report.items():
	if "_" in k:
		_, k1 = k.split("_")
		clean_results[k1][k] = v
	
clean_results.keys()

In [None]:
scores = {}

for k1, raw in clean_results.items():
	precisions = 0
	recalls = 0
	f1_scores = 0
	supports = 0
	for k, v in raw.items():
		precisions += v["precision"]
		recalls += v["recall"]
		f1_scores += v["f1-score"]
		supports += v["support"]
	precisions = precisions / len(raw)
	recalls = recalls / len(raw)
	f1_scores = f1_scores / len(raw)
	scores[k1] = {
		"support": supports,
		"precision": precisions,
		"recall": recalls,
		"f1": f1_scores,
		"size": len(raw)
	}

In [None]:
import pandas as pd

df = pd.DataFrame(scores).T

df.sample(5)

In [None]:
df.to_csv("data/predictions/final_predictions_base.csv", index=True)

In [None]:
df.to_markdown("data/predictions/final_predictions_base.md", index=True)