In [1]:
import pandas as pd
import numpy as np

# Load your merged DataFrame, e.g., for GPT-4o
df = pd.read_csv("GPT4o_Merged_Multilingual.csv")

def compute_entropy(series):
    counts = series.value_counts(normalize=True)
    entropy = -np.sum(counts * np.log2(counts))
    return entropy

# Entropy for Persian (FA), Italian (IT), and English (EN)
entropy_fa = compute_entropy(df["GPT4o_FA"])
entropy_it = compute_entropy(df["GPT4o_IT"])
entropy_en = compute_entropy(df["GPT4o_EN"])

print(f"Entropy (EN): {entropy_en:.3f}")
print(f"Entropy (FA): {entropy_fa:.3f}")
print(f"Entropy (IT): {entropy_it:.3f}")


Entropy (EN): 1.896
Entropy (FA): 1.926
Entropy (IT): 1.971


In [3]:
import pandas as pd
import numpy as np

# Load your merged DataFrame, e.g., for GPT-4o
df = pd.read_csv("LLaMA3.1_70_Merged_Multilingual.csv")

def compute_entropy(series):
    counts = series.value_counts(normalize=True)
    entropy = -np.sum(counts * np.log2(counts))
    return entropy

# Entropy for Persian (FA), Italian (IT), and English (EN)
entropy_fa = compute_entropy(df["LLaMA3.1_FA"])
entropy_it = compute_entropy(df["LLaMA3.1_IT"])
entropy_en = compute_entropy(df["LLaMA3.1_EN"])

print(f"Entropy (EN): {entropy_en:.3f}")
print(f"Entropy (FA): {entropy_fa:.3f}")
print(f"Entropy (IT): {entropy_it:.3f}")

Entropy (EN): 1.988
Entropy (FA): 1.972
Entropy (IT): 1.879


In [4]:
import pandas as pd
import numpy as np

# Load your merged DataFrame, e.g., for GPT-4o
df = pd.read_csv("LLaMA3.2_Merged_Multilingual.csv")

def compute_entropy(series):
    counts = series.value_counts(normalize=True)
    entropy = -np.sum(counts * np.log2(counts))
    return entropy

# Entropy for Persian (FA), Italian (IT), and English (EN)
entropy_fa = compute_entropy(df["LLaMA_FA"])
entropy_it = compute_entropy(df["LLaMA_IT"])
entropy_en = compute_entropy(df["LLaMA_EN"])

print(f"Entropy (EN): {entropy_en:.3f}")
print(f"Entropy (FA): {entropy_fa:.3f}")
print(f"Entropy (IT): {entropy_it:.3f}")

Entropy (EN): 1.840
Entropy (FA): 1.809
Entropy (IT): 1.904


In [5]:
import pandas as pd
import numpy as np

# Load your merged DataFrame, e.g., for GPT-4o
df = pd.read_csv("Qwen2.5_Merged_Multilingual.csv")

def compute_entropy(series):
    counts = series.value_counts(normalize=True)
    entropy = -np.sum(counts * np.log2(counts))
    return entropy

# Entropy for Persian (FA), Italian (IT), and English (EN)
entropy_fa = compute_entropy(df["Qwen2.5_FA"])
entropy_it = compute_entropy(df["Qwen2.5_IT"])
entropy_en = compute_entropy(df["Qwen2.5_EN"])

print(f"Entropy (EN): {entropy_en:.3f}")
print(f"Entropy (FA): {entropy_fa:.3f}")
print(f"Entropy (IT): {entropy_it:.3f}")

Entropy (EN): 1.929
Entropy (FA): 1.743
Entropy (IT): 1.929


In [7]:
import pandas as pd
import numpy as np

# Load your merged DataFrame, e.g., for GPT-4o
df = pd.read_csv("XLM-R_Merged_Multilingual.csv")

def compute_entropy(series):
    counts = series.value_counts(normalize=True)
    entropy = -np.sum(counts * np.log2(counts))
    return entropy

# Entropy for Persian (FA), Italian (IT), and English (EN)
entropy_fa = compute_entropy(df["XLMR_FA"])
entropy_it = compute_entropy(df["XLMR_IT"])
entropy_en = compute_entropy(df["XLMR_EN"])

print(f"Entropy (EN): {entropy_en:.3f}")
print(f"Entropy (FA): {entropy_fa:.3f}")
print(f"Entropy (IT): {entropy_it:.3f}")

Entropy (EN): 1.680
Entropy (FA): 1.731
Entropy (IT): 1.955


In [9]:
import pandas as pd
import numpy as np

# Load your merged DataFrame, e.g., for GPT-4o
df = pd.read_csv("mBERT_Merged_Multilingual.csv")

def compute_entropy(series):
    counts = series.value_counts(normalize=True)
    entropy = -np.sum(counts * np.log2(counts))
    return entropy

# Entropy for Persian (FA), Italian (IT), and English (EN)
entropy_fa = compute_entropy(df["mBERT_FA"])
entropy_it = compute_entropy(df["mBERT_IT"])
entropy_en = compute_entropy(df["mBERT_EN"])

print(f"Entropy (EN): {entropy_en:.3f}")
print(f"Entropy (FA): {entropy_fa:.3f}")
print(f"Entropy (IT): {entropy_it:.3f}")

Entropy (EN): 1.420
Entropy (FA): 1.980
Entropy (IT): 1.928


In [4]:
import pandas as pd
import numpy as np

def compute_entropy(series):
    probs = series.value_counts(normalize=True)
    return -np.sum(probs * np.log2(probs + 1e-9))  # small epsilon to avoid log(0)

def bootstrap_entropy_CI(series, n_bootstrap=1000, confidence=0.95):
    bootstrapped = []
    for _ in range(n_bootstrap):
        sample = series.sample(frac=1.0, replace=True)
        bootstrapped.append(compute_entropy(sample))
    lower = np.percentile(bootstrapped, (1 - confidence) / 2 * 100)
    upper = np.percentile(bootstrapped, (1 + confidence) / 2 * 100)
    mean = np.mean(bootstrapped)
    return mean, lower, upper

# Define your LLMs and their corresponding CSVs and column prefixes
models = {
    "GPT-4o":     ("GPT4o_Merged_Multilingual.csv", "GPT4o"),
    "LLaMA 3.1":  ("LLaMA3.1_70_Merged_Multilingual.csv", "LLaMA3.1"),
    "LLaMA 3.2":  ("LLaMA3.2_Merged_Multilingual.csv", "LLaMA"),
    "Qwen2.5":    ("Qwen2.5_Merged_Multilingual.csv", "Qwen2.5"),
    "mBERT":      ("mBERT_Merged_Multilingual.csv", "mBERT"),
    "XLM-R":      ("XLM-R_Merged_Multilingual.csv", "XLMR"),
}

results = []

for model_name, (filepath, prefix) in models.items():
    df = pd.read_csv(filepath)
    for lang in ["EN", "FA", "IT"]:
        col = f"{prefix}_{lang}"
        mean, low, high = bootstrap_entropy_CI(df[col])
        results.append({
            "Model": model_name,
            "Lang": lang,
            "Entropy Mean": round(mean, 3),
            "CI Lower": round(low, 3),
            "CI Upper": round(high, 3)
        })

# Convert results to DataFrame
entropy_df = pd.DataFrame(results)
print(entropy_df)



        Model Lang  Entropy Mean  CI Lower  CI Upper
0      GPT-4o   EN         1.857     1.691     1.968
1      GPT-4o   FA         1.886     1.733     1.984
2      GPT-4o   IT         1.932     1.803     1.995
3   LLaMA 3.1   EN         1.952     1.866     1.997
4   LLaMA 3.1   FA         1.936     1.803     1.994
5   LLaMA 3.1   IT         1.843     1.672     1.955
6   LLaMA 3.2   EN         1.800     1.578     1.945
7   LLaMA 3.2   FA         1.769     1.584     1.905
8   LLaMA 3.2   IT         1.867     1.709     1.975
9     Qwen2.5   EN         1.895     1.738     1.988
10    Qwen2.5   FA         1.707     1.497     1.885
11    Qwen2.5   IT         1.892     1.739     1.985
12      mBERT   EN         1.378     1.136     1.595
13      mBERT   FA         1.943     1.831     1.995
14      mBERT   IT         1.891     1.730     1.984
15      XLM-R   EN         1.642     1.385     1.825
16      XLM-R   FA         1.694     1.430     1.896
17      XLM-R   IT         1.922     1.805    