In [1]:
import pandas as pd
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
files = {
    "Camel-CA": "/content/Camel_ca_predictions_CANER_Corpus.txt",
    "Camel-MSA": "/content/Camel_msa_predictions_CANER_Corpus.txt",
    "Camel-Mix": "/content/Camel_mix_predictions_CANER_Corpus.txt",
    "Arbert": "/content/Arbert_predictions_CANER_Corpus.txt",
    "Ensemble-Majority": "/content/Compined_Predictions_Majority_CANER.txt",
    "Ensemble-Weighted": "/content/Compined_Predictions_Weighted_CANER.txt"
}


In [3]:
import pandas as pd


def load_predictions(file_path):
    df = pd.read_csv(file_path, sep="\t", usecols=["Token","Gold Tag","Predicted Tag"])
    return df.dropna()

# جمع كل البيانات
all_tags = set()
tag_counts = {}
accuracies = {model: {} for model in files.keys()}

# حساب Count لكل tag
for model, file in files.items():
    df = load_predictions(file)
    for tag in df["Gold Tag"].unique():
        all_tags.add(tag)
        if tag not in tag_counts:
            tag_counts[tag] = (df["Gold Tag"] == tag).sum()
    # حساب الدقة لكل tag
    for tag in all_tags:
        correct = df[(df["Gold Tag"]==tag) & (df["Gold Tag"]==df["Predicted Tag"])].shape[0]
        total = df["Gold Tag"].value_counts().get(tag,0)
        accuracies[model][tag] = correct/total if total>0 else 0

# بناء الجدول النهائي
rows = []
for tag in sorted(all_tags):
    row = [tag, tag_counts[tag]]
    for model in files.keys():
        row.append(round(accuracies[model][tag], 2))
    rows.append(row)

columns = ["Tag", "Count"] + list(files.keys())
final_df = pd.DataFrame(rows, columns=columns)

# عرض الجدول داخل Colab
#display(final_df)

# عرض الجدول بصيغة Markdown
print(final_df.to_markdown(index=False))


| Tag     |   Count |   Camel-CA |   Camel-MSA |   Camel-Mix |   Arbert |   Ensemble-Majority |   Ensemble-Weighted |
|:--------|--------:|-----------:|------------:|------------:|---------:|--------------------:|--------------------:|
| Allah   |     765 |       0.99 |        0.99 |        0.99 |     0.99 |                0.99 |                0.99 |
| Clan    |     103 |       0.94 |        0.93 |        0.91 |     0.95 |                0.93 |                0.94 |
| Date    |      56 |       0.93 |        0.96 |        0.93 |     0.96 |                0.96 |                0.93 |
| Loc     |     127 |       0.94 |        0.94 |        0.93 |     0.94 |                0.93 |                0.94 |
| NatOb   |      61 |       0.9  |        0.87 |        0.92 |     0.79 |                0.89 |                0.89 |
| O       |   19431 |       1    |        1    |        1    |     1    |                1    |                1    |
| Pers    |    3754 |       0.99 |        0.99 |        