<a href="https://colab.research.google.com/github/Hind-create/Projet-de-stage-Automatisation-des-AO/blob/main/PROJECTAO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from google.colab import files

# 📌 Ouvre une fenêtre pour sélectionner un fichier à téléverser
uploaded = files.upload()

# 📌 Affiche les fichiers téléversés
for filename in uploaded.keys():
    print(f"✅ Fichier téléversé : {filename}")


Saving resultats_classification_ao5.xlsx to resultats_classification_ao5.xlsx
✅ Fichier téléversé : resultats_classification_ao5.xlsx


In [12]:
# Installation des dépendances (à exécuter une seule fois)
!pip install transformers datasets evaluate --quiet
!pip install --upgrade pandas openpyxl --quiet

import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding, TrainingArguments, Trainer
import evaluate
from torch.nn.functional import softmax

# Paramètres
data_path = "munisys_cleaned.xlsx"
test_file_path = "resultats_classification_ao5.xlsx"
text_column_name = "Objet"
label_column_name = "BU"
model_name = "camembert-base"
test_size = 0.2
num_labels = 5
confidence_threshold = 0.50

# Chargement et préparation des données
df = pd.read_excel(data_path)
df = df[[text_column_name, label_column_name]].dropna()
df[label_column_name] = df[label_column_name].astype(str).str.strip().str.lower()

label2id = {
    "endpoint solutions": 0,
    "cybersecurity": 1,
    "infrastructure software": 2,
    "cloud and data center solutions": 3,
    "non pertinent": 4
}
id2label = {v: k for k, v in label2id.items()}

df = df[df[label_column_name].isin(label2id.keys())]
df["label"] = df[label_column_name].map(label2id)

df_train, df_test = train_test_split(df, test_size=test_size, stratify=df["label"], random_state=42)
train_dataset = Dataset.from_pandas(df_train)
test_dataset = Dataset.from_pandas(df_test)

# Tokenisation
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    tokens = tokenizer(
        examples[text_column_name],
        truncation=True,
        padding="max_length",
        max_length=128
    )
    tokens["labels"] = examples["label"]
    return tokens

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

# Modèle
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Entraînement
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return metric.compute(predictions=preds, references=labels)

training_args = TrainingArguments(
    output_dir="./results_camembert",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_strategy="epoch",
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()

# Évaluation
pred_output = trainer.predict(tokenized_test)
logits = pred_output.predictions
preds = np.argmax(logits, axis=1)
labels = pred_output.label_ids

print("\n Rapport de classification sur le jeu test :")
target_names = [id2label[i] for i in range(num_labels)]
print(classification_report(labels, preds, labels=list(range(num_labels)), target_names=target_names))


# Partie classification + commerciaux
df_to_classify = pd.read_excel(test_file_path)

if text_column_name not in df_to_classify.columns or "Organisme" not in df_to_classify.columns:
    raise ValueError(f"Les colonnes 'Objet' ou 'Organisme' sont manquantes dans le fichier {test_file_path}")

texts_to_classify = df_to_classify[text_column_name].dropna().astype(str).tolist()
organismes = df_to_classify["Organisme"].astype(str).tolist()


organisme_to_commercial = {
    # Ministères et administrations centrales
    "ministère de la santé": "Youssef El Fassi",
    "ministère de l'intérieur": "Sara Benali",
    "ministère de la justice": "Karima El Ayachi",
    "ministère des finances": "Rachid Hakam",
    "ministère de l'éducation": "Yassir Bennis",
    "ministère de l'agriculture": "Meryem Hajji",
    "ministère du tourisme": "Naoufal Khattabi",
    "ministère de l'équipement": "Khadija Skalli",
    "ministère de la culture": "Nadia Chraibi",
    "ministère de l'industrie": "Mehdi Rahmouni",
    "ministère de la transition énergétique": "Asmae Belkadi",
    "ministère des habous": "Nadia Chraibi",
    "ministère de la jeunesse": "Wafae Bennis",
    "ministère des affaires étrangères": "Maha El Mernissi",

    # Secteur bancaire et financier
    "banque populaire": "Omar Bouzid",
    "bmce": "Imane Lahlou",
    "attijariwafa": "Hicham Dali",
    "caisse de dépôt": "Hind Aouad",
    "barid al maghrib": "Brahim Joulali",
    "bank al maghrib": "Mouna Kadiri",
    "al barid bank": "Brahim Joulali",

    # Télécommunications
    "maroc telecom": "Othmane Filali",
    "inwi": "Soukaina Hamidi",
    "orange": "Reda Lakrimi",

    # Santé et protection sociale
    "cnops": "Younes Aklil",
    "cnss": "Kenza El Yazidi",
    "hopital": "Leila El Mansouri",
    "centre hospitalier": "Mounir Idrissi",
    "chp": "Mounir Idrissi",
    "chr": "Mounir Idrissi",
    "chu": "Leila El Mansouri",
    "ino": "Leila El Mansouri",
    "institut pasteur": "Leila El Mansouri",

    # Éducation et recherche
    "université": "Nadia Tazi",
    "ensa": "Youssef Boughanmi",
    "est": "Hind Aouad",
    "école": "Salma Daoudi",
    "faculté": "Nadia Tazi",
    "aref": "Ali Fadili",
    "académie": "Ali Fadili",
    "ofppt": "Nadia Tazi",

    # Collectivités territoriales
    "direction régionale": "Ayoub Kabbaj",
    "province": "Hamza Boulahfa",
    "préfecture": "Sanaa El Alaoui",
    "région": "Ali Fadili",
    "commune": "Lamiaa Azouzi",
    "wilaya": "Sanaa El Alaoui",
    "pachalik": "Hamza Boulahfa",
    "caïdat": "Hamza Boulahfa",

    # Entreprises publiques
    "onee": "Samir Taleb",
    "oncf": "Rachid Sbai",
    "onda": "Mehdi Rahmouni",
    "anp": "Mehdi Rahmouni",
    "marsa maroc": "Mehdi Rahmouni",
    "tanger med": "Mehdi Rahmouni",
    "ormva": "Ayoub Kabbaj",
    "onssa": "Najib Lahlou",
    "ompic": "Nabil Khattabi",
    "ancfcc": "Soufiane Rahali",
    "radeef": "Imane Bakkali",
    "radem": "Younes Mzali",

    # Sécurité et défense
    "dgssi": "Karim Nassiri",
    "dgsn": "Hassan Souhail",
    "gendarmerie": "Saïd Kabbouri",
    "sécurité": "Yasmine Maachou",
    "protection civile": "Soufiane Rahali",
    "forces auxiliaires": "Wafae Bennis",
    "armée": "Soufiane Rahali",
    "far": "Soufiane Rahali",

    # Justice
    "tribunal": "Karima El Ayachi",
    "cour": "Azzeddine El Ghali",
    "circonscription judiciaire": "Azzeddine El Ghali",

    # Autres organismes
    "fondation": "Hajar Mouline",
    "rak": "Amine El Idrissi",
    "agence nationale du soutien sociale": "Samira Maouni",
    "agence marocaine du médicament": "Nabil Khattabi",
    "haut commissariat": "Abdelhak Chami",
    "cndh": "Maha El Mernissi",
    "cgem": "Mohamed Oubihi",
    "inspection": "Wafae Bennis",
    "audit": "Tarik Ghoulam",
    "douane": "Aziz Jebari",
    "poste": "Brahim Joulali",
    "tgr": "Hicham Badaoui",
    "agence urbaine": "Omar Kharbouch",
    "mutuelle": "Kenza El Yazidi",
    "cmr": "Kenza El Yazidi",
    "cdg": "Hind Aouad",
    "al omrane": "Khadija Skalli",
    "sorec": "Azzeddine El Ghali",
    "inrh": "Nadia Tazi",
    "insea": "Abdelhak Chami",

    # Nouveaux ajouts pour couvrir plus de cas
    "laboratoire": "Leila El Mansouri",
    "centre de santé": "Leila El Mansouri",
    "polyclinique": "Leila El Mansouri",
    "direction provinciale": "Hamza Boulahfa",
    "délégation": "Hamza Boulahfa",
    "agence": "Omar Kharbouch",
    "office": "Najib Lahlou",
    "établissement": "Nadia Tazi",
    "institut": "Nadia Tazi",
    "école nationale": "Salma Daoudi",
    "lycée": "Ali Fadili",
    "collège": "Ali Fadili",
    "académie régionale": "Ali Fadili",
    "direction générale": "Ayoub Kabbaj",
    "service": "Ayoub Kabbaj",
    "division": "Ayoub Kabbaj",
    "secteur": "Ayoub Kabbaj",
    "unité": "Ayoub Kabbaj",
    "projet": "Mehdi Rahmouni",
    "système d'information": "Hind Aouad",
    "solution informatique": "Hind Aouad",
    "plateforme": "Hind Aouad",
    "réseau": "Amine El Idrissi",
    "télécommunication": "Othmane Filali",
    "technologie": "Othmane Filali",
    "digital": "Othmane Filali",
    "données": "Abdelhak Chami",
    "statistique": "Abdelhak Chami",
    "plan": "Abdelhak Chami",
    "développement": "Mehdi Rahmouni",
    "investissement": "Mehdi Rahmouni",
    "port": "Mehdi Rahmouni",
    "aéroport": "Mehdi Rahmouni",
    "transport": "Rachid Sbai",
    "infrastructure": "Khadija Skalli",
    "bâtiment": "Khadija Skalli",
    "construction": "Khadija Skalli",
    "énergie": "Samir Taleb",
    "électricité": "Samir Taleb",
    "eau": "Samir Taleb",
    "environnement": "Maha El Mernissi",
    "développement durable": "Maha El Mernissi",
    "agriculture": "Meryem Hajji",
    "pêche": "Meryem Hajji",
    "forêt": "Meryem Hajji",
    "sécurité": "Yasmine Maachou",
    "sûreté": "Yasmine Maachou",
    "protection": "Yasmine Maachou",
    "santé": "Leila El Mansouri",
    "médical": "Leila El Mansouri",
    "hôpital": "Leila El Mansouri",
    "clinique": "Leila El Mansouri",
    "radiologie": "Leila El Mansouri",
    "laboratoire": "Leila El Mansouri",
    "éducation": "Ali Fadili",
    "formation": "Ali Fadili",
    "enseignement": "Ali Fadili",
    "recherche": "Nadia Tazi",
    "science": "Nadia Tazi",
    "innovation": "Nadia Tazi",
    "culture": "Nadia Chraibi",
    "art": "Nadia Chraibi",
    "patrimoine": "Nadia Chraibi",
    "communication": "Nadia Chraibi",
    "média": "Nadia Chraibi",
    "presse": "Nadia Chraibi",
    "finance": "Rachid Hakam",
    "économie": "Rachid Hakam",
    "budget": "Rachid Hakam",
    "fiscalité": "Rachid Hakam",
    "douane": "Aziz Jebari",
    "impôt": "Rachid Hakam",
    "banque": "Omar Bouzid",
    "assurance": "Omar Bouzid",
    "crédit": "Omar Bouzid",
    "justice": "Karima El Ayachi",
    "tribunal": "Karima El Ayachi",
    "cour": "Azzeddine El Ghali",
    "juridique": "Karima El Ayachi",
    "droit": "Karima El Ayachi",
    "sécurité sociale": "Kenza El Yazidi",
    "retraite": "Kenza El Yazidi",
    "prévoyance": "Kenza El Yazidi",
    "social": "Samira Maouni",
    "solidarité": "Samira Maouni",
    "famille": "Samira Maouni",
    "femme": "Samira Maouni",
    "jeunesse": "Wafae Bennis",
    "sport": "Wafae Bennis",
    "loisir": "Wafae Bennis",
    "tourisme": "Naoufal Khattabi",
    "hôtellerie": "Naoufal Khattabi",
    "restauration": "Naoufal Khattabi",
    "industrie": "Mehdi Rahmouni",
    "production": "Mehdi Rahmouni",
    "manufacture": "Mehdi Rahmouni",
    "commerce": "Nabil Khattabi",
    "vente": "Nabil Khattabi",
    "marketing": "Nabil Khattabi",
    "service": "Ayoub Kabbaj",
    "administration": "Ayoub Kabbaj",
    "public": "Ayoub Kabbaj",
    "gouvernement": "Ayoub Kabbaj",
    "état": "Ayoub Kabbaj",
    "collectivité": "Ali Fadili",
    "territoire": "Ali Fadili",
    "local": "Ali Fadili",
    "régional": "Ali Fadili",
    "urbain": "Lamiaa Azouzi",
    "ville": "Lamiaa Azouzi",
    "municipal": "Lamiaa Azouzi",
    "communal": "Lamiaa Azouzi",
    "rural": "Hamza Boulahfa",
    "campagne": "Hamza Boulahfa",
    "agricole": "Meryem Hajji",
    "ferme": "Meryem Hajji",
    "élevage": "Meryem Hajji",
    "pêche": "Meryem Hajji",
    "aquaculture": "Meryem Hajji",
    "forêt": "Meryem Hajji",
    "nature": "Meryem Hajji",
    "environnement": "Maha El Mernissi",
    "écologie": "Maha El Mernissi",
    "climat": "Maha El Mernissi",
    "développement durable": "Maha El Mernissi",
    "énergie": "Samir Taleb",
    "électricité": "Samir Taleb",
    "renouvelable": "Samir Taleb",
    "pétrole": "Samir Taleb",
    "gaz": "Samir Taleb",
    "mine": "Samir Taleb",
    "ressource": "Samir Taleb",
    "eau": "Samir Taleb",
    "hydraulique": "Samir Taleb",
    "assainissement": "Samir Taleb",
    "distribution": "Samir Taleb",
    "traitement": "Samir Taleb",
    "transport": "Rachid Sbai",
    "logistique": "Rachid Sbai",
    "mobilité": "Rachid Sbai",
    "infrastructure": "Khadija Skalli",
    "construction": "Khadija Skalli",
    "bâtiment": "Khadija Skalli",
    "travaux": "Khadija Skalli",
    "ingénierie": "Khadija Skalli",
    "architecture": "Khadija Skalli",
    "urbanisme": "Khadija Skalli",
    "aménagement": "Khadija Skalli",
    "technologie": "Othmane Filali",
    "digital": "Othmane Filali",
    "informatique": "Hind Aouad",
    "système d'information": "Hind Aouad",
    "logiciel": "Hind Aouad",
    "donnée": "Abdelhak Chami",
    "statistique": "Abdelhak Chami",
    "analyse": "Abdelhak Chami",
    "recherche": "Nadia Tazi",
    "science": "Nadia Tazi",
    "innovation": "Nadia Tazi",
    "développement": "Mehdi Rahmouni",
    "projet": "Mehdi Rahmouni",
    "investissement": "Mehdi Rahmouni",
    "financement": "Mehdi Rahmouni",
    "partenariat": "Mehdi Rahmouni",
    "coopération": "Mehdi Rahmouni",
    "international": "Maha El Mernissi",
    "relation": "Maha El Mernissi",
    "diplomatie": "Maha El Mernissi",
    "commerce international": "Maha El Mernissi",
    "export": "Maha El Mernissi",
    "import": "Maha El Mernissi",
    "douane": "Aziz Jebari",
    "fiscalité": "Rachid Hakam",
    "impôt": "Rachid Hakam",
    "taxe": "Rachid Hakam",
    "comptabilité": "Rachid Hakam",
    "audit": "Tarik Ghoulam",
    "contrôle": "Tarik Ghoulam",
    "inspection": "Wafae Bennis",
    "évaluation": "Wafae Bennis",
    "qualité": "Wafae Bennis",
    "certification": "Wafae Bennis",
    "norme": "Wafae Bennis",
    "sécurité": "Yasmine Maachou",
    "sûreté": "Yasmine Maachou",
    "protection": "Yasmine Maachou",
    "défense": "Soufiane Rahali",
    "armée": "Soufiane Rahali",
    "militaire": "Soufiane Rahali",
    "gendarmerie": "Saïd Kabbouri",
    "police": "Yasmine Maachou",
    "sécurité civile": "Yasmine Maachou",
    "protection civile": "Yasmine Maachou",
    "urgence": "Yasmine Maachou",
    "santé": "Leila El Mansouri",
    "médical": "Leila El Mansouri",
    "hôpital": "Leila El Mansouri",
    "clinique": "Leila El Mansouri",
    "pharmacie": "Nabil Khattabi",
    "médicament": "Nabil Khattabi",
    "laboratoire": "Leila El Mansouri",
    "analyse": "Leila El Mansouri",
    "diagnostic": "Leila El Mansouri",
    "radiologie": "Leila El Mansouri",
    "imagerie": "Leila El Mansouri",
    "chirurgie": "Leila El Mansouri",
    "médecine": "Leila El Mansouri",
    "paramédical": "Leila El Mansouri",
    "infirmier": "Leila El Mansouri",
    "éducation": "Ali Fadili",
    "formation": "Ali Fadili",
    "enseignement": "Ali Fadili",
    "école": "Salma Daoudi",
    "université": "Nadia Tazi",
    "recherche": "Nadia Tazi",
    "science": "Nadia Tazi",
    "innovation": "Nadia Tazi",
    "culture": "Nadia Chraibi",
    "art": "Nadia Chraibi",
    "patrimoine": "Nadia Chraibi",
    "musée": "Nadia Chraibi",
    "bibliothèque": "Nadia Chraibi",
    "archive": "Nadia Chraibi",
    "monument": "Nadia Chraibi",
    "histoire": "Nadia Chraibi",
    "archéologie": "Nadia Chraibi",
    "communication": "Nadia Chraibi",
    "média": "Nadia Chraibi",
    "presse": "Nadia Chraibi",
    "journal": "Nadia Chraibi",
    "radio": "Nadia Chraibi",
    "télévision": "Nadia Chraibi",
    "cinéma": "Nadia Chraibi",
    "théâtre": "Nadia Chraibi",
    "spectacle": "Nadia Chraibi",
    "événement": "Nadia Chraibi",
    "festival": "Nadia Chraibi",
    "sport": "Wafae Bennis",
    "loisir": "Wafae Bennis",
    "jeunesse": "Wafae Bennis",
    "association": "Wafae Bennis",
    "fondation": "Hajar Mouline",
    "ong": "Hajar Mouline",
    "humanitaire": "Hajar Mouline",
    "social": "Samira Maouni",
    "solidarité": "Samira Maouni",
    "famille": "Samira Maouni",
    "femme": "Samira Maouni",
    "enfant": "Samira Maouni",
    "personne âgée": "Samira Maouni",
    "handicap": "Samira Maouni",
    "insertion": "Samira Maouni",
    "développement social": "Samira Maouni",
    "action sociale": "Samira Maouni",
    "aide": "Samira Maouni",
    "subvention": "Samira Maouni",
    "financement": "Mehdi Rahmouni",
    "investissement": "Mehdi Rahmouni",
    "projet": "Mehdi Rahmouni",
    "développement": "Mehdi Rahmouni",
    "coopération": "Mehdi Rahmouni",
    "partenariat": "Mehdi Rahmouni",
    "international": "Maha El Mernissi",
    "relation": "Maha El Mernissi",
    "diplomatie": "Maha El Mernissi",
    "commerce international": "Maha El Mernissi",
    "export": "Maha El Mernissi",
    "import": "Maha El Mernissi",
    "douane": "Aziz Jebari",
    "fiscalité": "Rachid Hakam",
    "impôt": "Rachid Hakam",
    "taxe": "Rachid Hakam",
    "comptabilité": "Rachid Hakam",
    "audit": "Tarik Ghoulam",
    "contrôle": "Tarik Ghoulam",
    "inspection": "Wafae Bennis",
    "évaluation": "Wafae Bennis",
    "qualité": "Wafae Bennis",
    "certification": "Wafae Bennis",
    "norme": "Wafae Bennis",
    "sécurité": "Yasmine Maachou",
    "sûreté": "Yasmine Maachou",
    "protection": "Yasmine Maachou",
    "défense": "Soufiane Rahali",
    "armée": "Soufiane Rahali",
    "militaire": "Soufiane Rahali",
    "gendarmerie": "Saïd Kabbouri",
    "police": "Yasmine Maachou",
    "sécurité civile": "Yasmine Maachou",
    "protection civile": "Yasmine Maachou",
    "urgence": "Yasmine Maachou",
    "santé": "Leila El Mansouri",
    "médical": "Leila El Mansouri",
    "hôpital": "Leila El Mansouri",
    "clinique": "Leila El Mansouri",
    "pharmacie": "Nabil Khattabi",
    "médicament": "Nabil Khattabi",
    "laboratoire": "Leila El Mansouri",
    "analyse": "Leila El Mansouri",
    "diagnostic": "Leila El Mansouri",
    "radiologie": "Leila El Mansouri",
    "imagerie": "Leila El Mansouri",
    "chirurgie": "Leila El Mansouri",
    "médecine": "Leila El Mansouri",
    "paramédical": "Leila El Mansouri",
    "infirmier": "Leila El Mansouri",
    "éducation": "Ali Fadili",
    "formation": "Ali Fadili",
    "enseignement": "Ali Fadili",
    "école": "Salma Daoudi",
    "université": "Nadia Tazi",
    "recherche": "Nadia Tazi",
    "science": "Nadia Tazi",
    "innovation": "Nadia Tazi",
    "culture": "Nadia Chraibi",
    "art": "Nadia Chraibi",
    "patrimoine": "Nadia Chraibi",
    "musée": "Nadia Chraibi",
    "bibliothèque": "Nadia Chraibi",
    "archive": "Nadia Chraibi",
    "monument": "Nadia Chraibi",
    "histoire": "Nadia Chraibi",
    "archéologie": "Nadia Chraibi",
    "communication": "Nadia Chraibi",
    "média": "Nadia Chraibi",
    "presse": "Nadia Chraibi",
    "journal": "Nadia Chraibi",
    "radio": "Nadia Chraibi",
    "télévision": "Nadia Chraibi",
    "cinéma": "Nadia Chraibi",
    "théâtre": "Nadia Chraibi",
    "spectacle": "Nadia Chraibi",
    "événement": "Nadia Chraibi",
    "festival": "Nadia Chraibi",
    "sport": "Wafae Bennis",
    "loisir": "Wafae Bennis",
    "jeunesse": "Wafae Bennis",
    "association": "Wafae Bennis",
    "fondation": "Hajar Mouline",
    "ong": "Hajar Mouline",
    "humanitaire": "Hajar Mouline",
    "social": "Samira Maouni",
    "solidarité": "Samira Maouni",
    "famille": "Samira Maouni",
    "femme": "Samira Maouni",
    "enfant": "Samira Maouni",
    "personne âgée": "Samira Maouni",
    "handicap": "Samira Maouni",
    "insertion": "Samira Maouni",
    "développement social": "Samira Maouni",
    "action sociale": "Samira Maouni",
    "aide": "Samira Maouni",
    "subvention": "Samira Maouni"
}

# Fonction d'affectation améliorée
def map_commercial_from_organisme(organisme):
    org = organisme.lower()

    # Vérification des correspondances exactes en premier
    for keyword, commercial in organisme_to_commercial.items():
        if keyword in org:
            return commercial

    # Vérification des mots-clés plus généraux si aucune correspondance exacte
    words = org.split()
    for word in words:
        for keyword, commercial in organisme_to_commercial.items():
            if keyword.split()[0] == word:  # On vérifie seulement le premier mot du keyword
                return commercial

    # Si aucun mot-clé ne correspond, on essaie de trouver un commercial par domaine
    domain_keywords = {
        "santé": "Leila El Mansouri",
        "éducation": "Ali Fadili",
        "formation": "Ali Fadili",
        "université": "Nadia Tazi",
        "école": "Salma Daoudi",
        "hôpital": "Leila El Mansouri",
        "centre hospitalier": "Mounir Idrissi",
        "ministère": "Ayoub Kabbaj",
        "direction": "Ayoub Kabbaj",
        "province": "Hamza Boulahfa",
        "préfecture": "Sanaa El Alaoui",
        "commune": "Lamiaa Azouzi",
        "région": "Ali Fadili",
        "agence": "Omar Kharbouch",
        "office": "Najib Lahlou",
        "institut": "Nadia Tazi",
        "laboratoire": "Leila El Mansouri",
        "projet": "Mehdi Rahmouni",
        "développement": "Mehdi Rahmouni",
        "technologie": "Othmane Filali",
        "informatique": "Hind Aouad",
        "système": "Hind Aouad",
        "donnée": "Abdelhak Chami",
        "statistique": "Abdelhak Chami",
        "finance": "Rachid Hakam",
        "économie": "Rachid Hakam",
        "banque": "Omar Bouzid",
        "assurance": "Omar Bouzid",
        "justice": "Karima El Ayachi",
        "tribunal": "Karima El Ayachi",
        "cour": "Azzeddine El Ghali",
        "sécurité": "Yasmine Maachou",
        "protection": "Yasmine Maachou",
        "défense": "Soufiane Rahali",
        "armée": "Soufiane Rahali",
        "gendarmerie": "Saïd Kabbouri",
        "police": "Yasmine Maachou",
        "culture": "Nadia Chraibi",
        "art": "Nadia Chraibi",
        "patrimoine": "Nadia Chraibi",
        "communication": "Nadia Chraibi",
        "média": "Nadia Chraibi",
        "presse": "Nadia Chraibi",
        "sport": "Wafae Bennis",
        "jeunesse": "Wafae Bennis",
        "social": "Samira Maouni",
        "solidarité": "Samira Maouni",
        "famille": "Samira Maouni",
        "femme": "Samira Maouni",
        "enfant": "Samira Maouni",
        "personne âgée": "Samira Maouni",
        "handicap": "Samira Maouni",
        "insertion": "Samira Maouni",
        "tourisme": "Naoufal Khattabi",
        "hôtellerie": "Naoufal Khattabi",
        "restauration": "Naoufal Khattabi",
        "industrie": "Mehdi Rahmouni",
        "production": "Mehdi Rahmouni",
        "manufacture": "Mehdi Rahmouni",
        "commerce": "Nabil Khattabi",
        "vente": "Nabil Khattabi",
        "marketing": "Nabil Khattabi",
        "service": "Ayoub Kabbaj",
        "administration": "Ayoub Kabbaj",
        "public": "Ayoub Kabbaj",
        "gouvernement": "Ayoub Kabbaj",
        "état": "Ayoub Kabbaj",
        "collectivité": "Ali Fadili",
        "territoire": "Ali Fadili",
        "local": "Ali Fadili",
        "régional": "Ali Fadili",
        "urbain": "Lamiaa Azouzi",
        "ville": "Lamiaa Azouzi",
        "municipal": "Lamiaa Azouzi",
        "communal": "Lamiaa Azouzi",
        "rural": "Hamza Boulahfa",
        "campagne": "Hamza Boulahfa",
        "agricole": "Meryem Hajji",
        "ferme": "Meryem Hajji",
        "élevage": "Meryem Hajji",
        "pêche": "Meryem Hajji",
        "aquaculture": "Meryem Hajji",
        "forêt": "Meryem Hajji",
        "nature": "Meryem Hajji",
        "environnement": "Maha El Mernissi",
        "écologie": "Maha El Mernissi",
        "climat": "Maha El Mernissi",
        "développement durable": "Maha El Mernissi",
        "énergie": "Samir Taleb",
        "électricité": "Samir Taleb",
        "renouvelable": "Samir Taleb",
        "pétrole": "Samir Taleb",
        "gaz": "Samir Taleb",
        "mine": "Samir Taleb",
        "ressource": "Samir Taleb",
        "eau": "Samir Taleb",
        "hydraulique": "Samir Taleb",
        "assainissement": "Samir Taleb",
        "distribution": "Samir Taleb",
        "traitement": "Samir Taleb",
        "transport": "Rachid Sbai",
        "logistique": "Rachid Sbai",
        "mobilité": "Rachid Sbai",
        "infrastructure": "Khadija Skalli",
        "construction": "Khadija Skalli",
        "bâtiment": "Khadija Skalli",
        "travaux": "Khadija Skalli",
        "ingénierie": "Khadija Skalli",
        "architecture": "Khadija Skalli",
        "urbanisme": "Khadija Skalli",
        "aménagement": "Khadija Skalli",
        "technologie": "Othmane Filali",
        "digital": "Othmane Filali",
        "informatique": "Hind Aouad",
        "système d'information": "Hind Aouad",
        "logiciel": "Hind Aouad",
        "donnée": "Abdelhak Chami",
        "statistique": "Abdelhak Chami",
        "analyse": "Abdelhak Chami",
        "recherche": "Nadia Tazi",
        "science": "Nadia Tazi",
        "innovation": "Nadia Tazi",
        "développement": "Mehdi Rahmouni",
        "projet": "Mehdi Rahmouni",
        "investissement": "Mehdi Rahmouni",
        "financement": "Mehdi Rahmouni",
        "partenariat": "Mehdi Rahmouni",
        "coopération": "Mehdi Rahmouni",
        "international": "Maha El Mernissi",
        "relation": "Maha El Mernissi",
        "diplomatie": "Maha El Mernissi",
        "commerce international": "Maha El Mernissi",
        "export": "Maha El Mernissi",
        "import": "Maha El Mernissi",
        "douane": "Aziz Jebari",
        "fiscalité": "Rachid Hakam",
        "impôt": "Rachid Hakam",
        "taxe": "Rachid Hakam",
        "comptabilité": "Rachid Hakam",
        "audit": "Tarik Ghoulam",
        "contrôle": "Tarik Ghoulam",
        "inspection": "Wafae Bennis",
        "évaluation": "Wafae Bennis",
        "qualité": "Wafae Bennis",
        "certification": "Wafae Bennis",
        "norme": "Wafae Bennis",
        "sécurité": "Yasmine Maachou",
        "sûreté": "Yasmine Maachou",
        "protection": "Yasmine Maachou",
        "défense": "Soufiane Rahali",
        "armée": "Soufiane Rahali",
        "militaire": "Soufiane Rahali",
        "gendarmerie": "Saïd Kabbouri",
        "police": "Yasmine Maachou",
        "sécurité civile": "Yasmine Maachou",
        "protection civile": "Yasmine Maachou",
        "urgence": "Yasmine Maachou",
        "santé": "Leila El Mansouri",
        "médical": "Leila El Mansouri",
        "hôpital": "Leila El Mansouri",
        "clinique": "Leila El Mansouri",
        "pharmacie": "Nabil Khattabi",
        "médicament": "Nabil Khattabi",
        "laboratoire": "Leila El Mansouri",
        "analyse": "Leila El Mansouri",
        "diagnostic": "Leila El Mansouri",
        "radiologie": "Leila El Mansouri",
        "imagerie": "Leila El Mansouri",
        "chirurgie": "Leila El Mansouri",
        "médecine": "Leila El Mansouri",
        "paramédical": "Leila El Mansouri",
        "infirmier": "Leila El Mansouri",
        "éducation": "Ali Fadili",
        "formatiown": "Ali Fadili",
        "enseignement": "Ali Fadili",
        "école": "Salma Daoudi",
        "université": "Nadia Tazi",
        "recherche": "Nadia Tazi",
        "science": "Nadia Tazi",
        "innovation": "Nadia Tazi"
 }
# Fonction d'affectation
def map_commercial_from_organisme(organisme):
    org = organisme.lower()
    for keyword, commercial in organisme_to_commercial.items():
        if keyword in org:
            return commercial
    return "Commercial inconnu"

# Prédictions
inputs = tokenizer(texts_to_classify, padding=True, truncation=True, return_tensors="pt", max_length=128)
inputs = {k: v.to(device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    probs = softmax(logits, dim=1)
    confidences, predicted_ids = torch.max(probs, dim=1)
    predictions = predicted_ids.cpu().numpy()
    confidences = confidences.cpu().numpy()

# Résultats finaux
results = []
inconnus = []

print("\n Résultats de classification des AO avec commerciaux :\n")
for text, orga, pred_id, conf in zip(texts_to_classify, organismes, predictions, confidences):
    label_pred = id2label[pred_id]
    if conf < confidence_threshold or label_pred == "non pertinent":
        final_label = "non pertinent"
    else:
        final_label = label_pred
    commercial = map_commercial_from_organisme(orga)

    print(f"- {orga} => BU : {final_label}, Commercial : {commercial} ({conf*100:.2f}%)")
    results.append({
        "Objet": text,
        "Organisme": orga,
        "BU prédite": final_label,
        "Confiance (%)": round(conf * 100, 2),
        "Commercial associé": commercial
    })

    if commercial == "Commercial inconnu":
        inconnus.append({
            "Organisme": orga,
            "Objet": text
        })

# Sauvegarde des résultats
df_results = pd.DataFrame(results)
df_results.to_excel("resultats_classification_aoCOmm11.xlsx", index=False)



print("\n✅ Fichiers enregistrés :")
print("- resultats_classification_aoCOmm1.xlsx")



Map:   0%|          | 0/1551 [00:00<?, ? examples/s]

Map:   0%|          | 0/388 [00:00<?, ? examples/s]

Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at camembert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
  return forward_call(*args, **kwargs)


Step,Training Loss
194,0.9435
388,0.1511
582,0.0522
776,0.0352
970,0.0248


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)



📊 Rapport de classification sur le jeu test :
                                 precision    recall  f1-score   support

             endpoint solutions       1.00      1.00      1.00        97
                  cybersecurity       1.00      1.00      1.00        98
        infrastructure software       1.00      1.00      1.00        97
cloud and data center solutions       1.00      1.00      1.00        96
                  non pertinent       0.00      0.00      0.00         0

                       accuracy                           1.00       388
                      macro avg       0.80      0.80      0.80       388
                   weighted avg       1.00      1.00      1.00       388



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return forward_call(*args, **kwargs)



📌 Résultats de classification des AO avec commerciaux :

- Fondation Hassan II / / fondation de la mosquee hassan ii => BU : endpoint solutions, Commercial : Mehdi Rahmouni (82.94%)
- ONCF => BU : endpoint solutions, Commercial : Rachid Sbai (97.75%)
- rak  => BU : endpoint solutions, Commercial : Amine El Idrissi (84.97%)
- Universites et cites universitaires / Université mohammed V de rabat la présidence => BU : endpoint solutions, Commercial : Nadia Tazi (65.76%)
- Region de tanger-tetouan - al hoceima /Commune urbaine de TANGER => BU : endpoint solutions, Commercial : Lamiaa Azouzi (92.94%)
- Region de Beni mellal - khenifra / Province Fquih Ben Salah => BU : endpoint solutions, Commercial : Hamza Boulahfa (95.26%)
- Office national de securite sanitaires des produits alimentaires(onssa)  => BU : infrastructure software, Commercial : Najib Lahlou (76.78%)
- ministere de la transition energetique et du developpement durable département de la transition energetique  => BU : non pert