In [None]:
%cd ..

In [None]:
import os
import json
from tqdm import tqdm

In [None]:
submissions = json.load(open("data/predictions/final_predictions_base.json"))

len(submissions)

In [None]:
if isinstance(submissions["predictions"][0], list):
    submissions["predictions"] = [item for sublist in submissions["predictions"] for item in sublist]
    submissions["expected"] = [item for sublist in submissions["expected"] for item in sublist]

In [None]:
submissions.keys()

In [None]:
import pycountry

CODE_SEPARATOR = "_"
def to_iso639_1(code: str) -> str:
	"""Convert language code to ISO 639-1 format.

	Args:
		code: Raw language code from model prediction

	Returns:
		Normalized ISO 639-1 language code
	"""
	# Extract base language code before any dialect/script indicators
	base_code = code.split(CODE_SEPARATOR)[0]

	try:
		# Convert to ISO 639-1 via pycountry
		lang = pycountry.languages.get(alpha_3=base_code)
		if not lang:
			return base_code
		return lang.alpha_2

	except (AttributeError, KeyError):
		# Fallback to original code if conversion fails
		if "Arab" in code:
			return "ar"
		return code

In [None]:
predictions = [to_iso639_1(i) for i in submissions["predictions"]]
expected = submissions["expected"]

In [None]:
unique_languages = set(submissions["predictions"])
unique_languages

In [None]:
[to_iso639_1(i) for i in unique_languages]

In [None]:
predictions[:5]

In [None]:
expected[:5]

In [None]:
from sklearn.metrics import (
	accuracy_score,
	precision_score,
	recall_score,
	f1_score,
	classification_report,
)

global_accuracy = accuracy_score(expected, predictions)
global_precision = precision_score(expected, predictions, average="weighted", zero_division=0)
global_recall = recall_score(expected, predictions, average="weighted", zero_division=0)
global_f1 = f1_score(expected, predictions, average="weighted", zero_division=0)
report = classification_report(expected, predictions, output_dict=True, zero_division=0)

In [None]:
results = {
	"accuracy/avg": global_accuracy,
	"precision/avg": global_precision,
	"recall/avg": global_recall,
	"f1/avg": global_f1,
}

results

In [None]:
report["fr"]

In [None]:
scores = report

In [None]:
import pandas as pd

df = pd.DataFrame(scores).T

df.sample(5)

In [None]:
df.to_csv("data/predictions/final_predictions_base.csv", index=True)

In [None]:
df.to_markdown("data/predictions/final_predictions_base.md", index=True)

In [None]:
submissions = pd.DataFrame(submissions)

submissions.sample(5)

In [None]:
submissions["predictions"] = submissions["predictions"].apply(to_iso639_1)

submissions.sample(5)

In [None]:
submissions[submissions["predictions"] != submissions["expected"]]

In [None]:
submissions[(submissions["predictions"] != submissions["expected"]) & submissions["predictions"].apply(lambda x: len(x) > 2)]

In [None]:
raws = submissions[(submissions["predictions"] != submissions["expected"]) & submissions["predictions"].apply(lambda x: len(x) > 3)]
print(len(raws))

raws = raws.drop_duplicates()

print(len(raws))

In [None]:
raws