In [1]:
import os
import json
from tqdm import tqdm

In [2]:
submissions = json.load(open("data/predictions/final_predictions_base.json"))

len(submissions)

2

In [3]:
submissions.keys()

dict_keys(['predictions', 'expected'])

In [4]:
predictions = submissions["predictions"]
expected = submissions["expected"]

In [5]:
predictions[:5]

['ces_Latn', 'mlt_Latn', 'ayr_Latn', 'kmb_Latn', 'uig_Arab']

In [6]:
expected[:5]

['ces_Latn', 'mlt_Latn', 'ayr_Latn', 'kmb_Latn', 'uig_Arab']

In [7]:
from sklearn.metrics import (
	accuracy_score,
	precision_score,
	recall_score,
	f1_score,
	classification_report,
)

global_accuracy = accuracy_score(expected, predictions)
global_precision = precision_score(expected, predictions, average="weighted", zero_division=0)
global_recall = recall_score(expected, predictions, average="weighted", zero_division=0)
global_f1 = f1_score(expected, predictions, average="weighted", zero_division=0)
report = classification_report(expected, predictions, output_dict=True, zero_division=0)

In [8]:
results = {
	"accuracy/avg": global_accuracy,
	"precision/avg": global_precision,
	"recall/avg": global_recall,
	"f1/avg": global_f1,
}

results

{'accuracy/avg': 0.9594876819079662,
 'precision/avg': 0.9596566609837892,
 'recall/avg': 0.9594876819079662,
 'f1/avg': 0.9594363447462714}

In [9]:
report["fra_Latn"]

{'precision': 0.9710373212062702,
 'recall': 0.9817878243000256,
 'f1-score': 0.9763829814410341,
 'support': 77860.0}

In [10]:
from collections import defaultdict

clean_results = defaultdict(dict)

for k, v in report.items():
	if "_" in k:
		_, k1 = k.split("_")
		clean_results[k1][k] = v
	
clean_results.keys()

dict_keys(['Arab', 'Latn', 'Ethi', 'Beng', 'Deva', 'Cyrl', 'Tibt', 'Grek', 'Gujr', 'Hebr', 'Armn', 'Jpan', 'Knda', 'Geor', 'Khmr', 'Hang', 'Laoo', 'Mlym', 'Mymr', 'Orya', 'Guru', 'Olck', 'Sinh', 'Taml', 'Tfng', 'Telu', 'Thai', 'Hant', 'Hans'])

In [11]:
scores = {}

for k1, raw in clean_results.items():
	precisions = 0
	recalls = 0
	f1_scores = 0
	supports = 0
	for k, v in raw.items():
		precisions += v["precision"]
		recalls += v["recall"]
		f1_scores += v["f1-score"]
		supports += v["support"]
	precisions = precisions / len(raw)
	recalls = recalls / len(raw)
	f1_scores = f1_scores / len(raw)
	scores[k1] = {
		"support": supports,
		"precision": precisions,
		"recall": recalls,
		"f1": f1_scores,
		"size": len(raw)
	}

In [12]:
import pandas as pd

df = pd.DataFrame(scores).T

df.sample(5)

Unnamed: 0,support,precision,recall,f1,size
Khmr,39708.0,0.999975,0.999673,0.999824,1.0
Orya,44976.0,0.9994,0.999666,0.999533,1.0
Deva,423895.0,0.929529,0.910336,0.919058,10.0
Laoo,29389.0,0.999864,0.999796,0.99983,1.0
Hang,108509.0,0.999788,0.999742,0.999765,1.0


In [13]:
df.to_csv("data/predictions/final_predictions_base.csv", index=True)