In [1]:
!pip install torch

import torch
import random
import numpy as np

np.random.seed(42)
random.seed(42)
torch.manual_seed(42)

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import os
from torch.nn.functional import softmax
from pathlib import Path
import nlpaug.augmenter.word as naw

#Liste de Label
labels = ["admiration", "amusement" , "anger" , "annoyance" ,"approval" ,"caring" ,"confusion" ,"curiosity" ,"desire","disappointment","disapproval",
          "disgust","embarrassment","excitement","fear","gratitude","grief","joy","love","nervousness","optimism","pride","realization","relief",
          "remorse","sadness","surprise","neutral"
]

#Utilisation du modèle
model_n = "bhadresh-savani/bert-base-go-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_n)
model = AutoModelForSequenceClassification.from_pretrained(model_n)

#Fonctions pour predire les émotions
def pred_e(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = softmax(outputs.logits, dim=-1).squeeze().tolist()
    return dict(zip(labels, probs))

#Initialisation d'un augmentateur grâce à WordNet (Synonymes)
augmenter = naw.SynonymAug(aug_src='wordnet')

#Géneration de versions augmenté
def augmenter_texte(text, n=2):
    """Retourne n versions augmenté"""
    try:
        return [augmenter.augment(text) for _ in range(n)]
    except:
        return [text]*n

#Sauvegarde des resultat
def main():
    folder_clean = Path.home() / "PROJECT" / "data" / "clean"
    output_csv = Path.home() /"PROJECT" / "figures" / "Augmentationdedonnees"/ "emotions.csv"
    os.makedirs(output_csv.parent, exist_ok=True)

    #Stockage resultats 
    results = []
    clean_files = list(folder_clean.rglob("*.txt"))
    print("Fichiers Trouvés")

    #parcourir les fichiers
    for clean_file in clean_files:
        with open(clean_file,"r", encoding="utf-8") as f:
            text_clean = f.read().strip()

        #Liste texte original + 2 variantes
        textes_aug = [text_clean] + augmenter_texte(text_clean, n=2)

        for idx, text_variant in enumerate(textes_aug):
            emo_scores = pred_e(text_variant)

            text_display = text_variant[:200] + "..." if len(text_variant) > 200 else text_variant

            #Marque la source
            if idx == 0:
                source = "Original"
            else:
                source = f"augmented_{idx}"  

            #Prépare la ligne à ajouter au résultats
            row = {
                "source": source,
                "text": text_display
            }
            row.update(emo_scores)
            results.append(row)

    if not results:
        print("Aucun fichier")
        return

    #Création d'un DataFrame avec colonnes source, texte et chaque emotions
    df = pd.DataFrame(results, columns=["source","text"] + labels)

    df["label"] = df[labels].idxmax(axis=1)
    
    #Sauvegarde du DataFrame
    df.to_csv(output_csv, index=False)
    print(f"Csv Good")

if __name__ == "__main__":
    main()            



  from .autonotebook import tqdm as notebook_tqdm
W0519 14:04:33.613000 28516 torch\distributed\elastic\multiprocessing\redirects.py:29] NOTE: Redirects are currently not supported in Windows or MacOs.


Fichiers Trouvés


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\AyaKu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\AyaKu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\AyaKu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\AyaKu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\AyaKu/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Dow

Csv Good
