## Evaluation of models which contains english generation

In [13]:
import pandas as pd
from tqdm import tqdm
import fasttext
from huggingface_hub import hf_hub_download

# Dl GlotLID model
model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model.bin")
model = fasttext.load_model(model_path)

# Fonction which returns True if the text is in English
def is_english(text, threshold=0.8):
    if pd.isna(text) or not isinstance(text, str) or text.strip() == "":
        return False
    try:
        text = text.replace("\n", " ").strip()  # remove newlines because of a error 
        label, prob = model.predict(text)
        lang_code = label[0].replace("__label__", "")
        return lang_code.startswith("eng") and prob[0] >= threshold
    except Exception as e:
        print(f"Error processing text: {text[:50]}... -> {e}")
        return False

# analyze
def analyze_english_percentage(setting):
    modeles = [
        "bloom-560m", "bloom-3b", "gpt2-fr", "xglm-2", "bloom-7b",
        "vigogne-2-7b", "croissantbase", "croissant-it",
        "llama-3.2-3b-it", "llama-3.2-3b",
        "gemma-2-2b", "gemma-2-2b-it",
        "mistral-7b-instruct-v0.3", "mistral-7b-v0.3"
    ]
    
    results = []

    for modele in modeles:
        print(f"Model : {modele}")
        csv_file = f"../annotated_texts/FR/{setting}/annotated-coverletter_{setting}_fr_{modele}.csv"
        df = pd.read_csv(csv_file)

        column_name = "output"

        tqdm.pandas(desc=f"Analyzing sentences for : {modele}")
        df["is_english"] = df[column_name].progress_apply(lambda x: is_english(x))

        english_count = df["is_english"].sum()
        total_count = len(df)
        percentage_english = (english_count / total_count) * 100

        results.append({
            "Rank": None,
            "Model": modele,
            "English_percentage": round(percentage_english, 2)
        })

    results_df = pd.DataFrame(results)
    results_df["Rank"] = results_df["English_percentage"].rank(ascending=False, method="first").astype(int)
    results_df = results_df.sort_values(by="English_percentage", ascending=False).reset_index(drop=True)  
    results_df["English_percentage"] = results_df["English_percentage"].map("{:.2f}".format)
    print(results_df)

    #output_file = f"./results_percentage_english_{setting}.csv"
    #results_df.to_csv(output_file, index=False)
    #print(f"Résultats sauvegardés dans {output_file}")

    return results_df


In [14]:
res_neutral = analyze_english_percentage("neutral")

Model : bloom-560m


Analyzing sentences for : bloom-560m: 100%|██████████| 4992/4992 [00:03<00:00, 1436.50it/s]


Model : bloom-3b


Analyzing sentences for : bloom-3b: 100%|██████████| 4992/4992 [00:04<00:00, 1208.31it/s]


Model : gpt2-fr


Analyzing sentences for : gpt2-fr: 100%|██████████| 5006/5006 [00:04<00:00, 1131.96it/s]


Model : xglm-2


Analyzing sentences for : xglm-2: 100%|██████████| 4968/4968 [00:02<00:00, 1713.24it/s]


Model : bloom-7b


Analyzing sentences for : bloom-7b: 100%|██████████| 4998/4998 [00:04<00:00, 1218.03it/s]


Model : vigogne-2-7b


Analyzing sentences for : vigogne-2-7b: 100%|██████████| 4992/4992 [00:03<00:00, 1327.52it/s]


Model : croissantbase


Analyzing sentences for : croissantbase: 100%|██████████| 4968/4968 [00:03<00:00, 1266.16it/s]


Model : croissant-it


Analyzing sentences for : croissant-it: 100%|██████████| 4968/4968 [00:04<00:00, 1073.22it/s]


Model : llama-3.2-3b-it


Analyzing sentences for : llama-3.2-3b-it: 100%|██████████| 4968/4968 [00:04<00:00, 1180.95it/s]


Model : llama-3.2-3b


Analyzing sentences for : llama-3.2-3b: 100%|██████████| 4968/4968 [00:04<00:00, 1159.88it/s]


Model : gemma-2-2b


Analyzing sentences for : gemma-2-2b: 100%|██████████| 4968/4968 [00:04<00:00, 1226.89it/s]


Model : gemma-2-2b-it


Analyzing sentences for : gemma-2-2b-it: 100%|██████████| 4968/4968 [00:04<00:00, 1101.43it/s]


Model : mistral-7b-instruct-v0.3


Analyzing sentences for : mistral-7b-instruct-v0.3: 100%|██████████| 4992/4992 [00:03<00:00, 1313.36it/s]


Model : mistral-7b-v0.3


Analyzing sentences for : mistral-7b-v0.3: 100%|██████████| 4992/4992 [00:03<00:00, 1353.02it/s]

    Rank                     Model English_percentage
0      1           mistral-7b-v0.3              40.16
1      2  mistral-7b-instruct-v0.3               3.65
2      3             gemma-2-2b-it               1.11
3      4              croissant-it               0.14
4      5           llama-3.2-3b-it               0.08
5      6              llama-3.2-3b               0.08
6      7              vigogne-2-7b               0.06
7      8             croissantbase               0.06
8      9                   gpt2-fr               0.04
9     10                  bloom-7b               0.02
10    11                gemma-2-2b               0.02
11    12                bloom-560m               0.00
12    13                  bloom-3b               0.00
13    14                    xglm-2               0.00





In [15]:
res_gendered= analyze_english_percentage("gendered")

Model : bloom-560m


Analyzing sentences for : bloom-560m: 100%|██████████| 4968/4968 [00:03<00:00, 1330.63it/s]


Model : bloom-3b


Analyzing sentences for : bloom-3b: 100%|██████████| 4968/4968 [00:03<00:00, 1276.29it/s]


Model : gpt2-fr


Analyzing sentences for : gpt2-fr: 100%|██████████| 4968/4968 [00:04<00:00, 1163.65it/s]


Model : xglm-2


Analyzing sentences for : xglm-2: 100%|██████████| 4968/4968 [00:02<00:00, 1903.90it/s]


Model : bloom-7b


Analyzing sentences for : bloom-7b: 100%|██████████| 4968/4968 [00:03<00:00, 1300.88it/s]


Model : vigogne-2-7b


Analyzing sentences for : vigogne-2-7b: 100%|██████████| 4968/4968 [00:03<00:00, 1477.41it/s]


Model : croissantbase


Analyzing sentences for : croissantbase: 100%|██████████| 4968/4968 [00:03<00:00, 1414.06it/s]


Model : croissant-it


Analyzing sentences for : croissant-it: 100%|██████████| 4968/4968 [00:04<00:00, 1155.08it/s]


Model : llama-3.2-3b-it


Analyzing sentences for : llama-3.2-3b-it: 100%|██████████| 4968/4968 [00:04<00:00, 1233.35it/s]


Model : llama-3.2-3b


Analyzing sentences for : llama-3.2-3b: 100%|██████████| 4968/4968 [00:04<00:00, 1174.83it/s]


Model : gemma-2-2b


Analyzing sentences for : gemma-2-2b: 100%|██████████| 4968/4968 [00:03<00:00, 1337.52it/s]


Model : gemma-2-2b-it


Analyzing sentences for : gemma-2-2b-it: 100%|██████████| 4968/4968 [00:04<00:00, 1144.61it/s]


Model : mistral-7b-instruct-v0.3


Analyzing sentences for : mistral-7b-instruct-v0.3: 100%|██████████| 4968/4968 [00:03<00:00, 1310.11it/s]


Model : mistral-7b-v0.3


Analyzing sentences for : mistral-7b-v0.3: 100%|██████████| 4968/4968 [00:03<00:00, 1384.26it/s]

    Rank                     Model English_percentage
0      1           mistral-7b-v0.3              48.71
1      2  mistral-7b-instruct-v0.3               8.98
2      3             gemma-2-2b-it               8.64
3      4              vigogne-2-7b               0.34
4      5             croissantbase               0.14
5      6                gemma-2-2b               0.12
6      7                   gpt2-fr               0.04
7      8           llama-3.2-3b-it               0.04
8      9              llama-3.2-3b               0.04
9     10                bloom-560m               0.00
10    11                  bloom-3b               0.00
11    12                    xglm-2               0.00
12    13                  bloom-7b               0.00
13    14              croissant-it               0.00





In [16]:
#res_gendered.to_latex(index=False)

In [17]:
#res_neutral.to_latex(index=False)