In [None]:
import pandas as pd
import re
import spacy
import joblib
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Charger spaCy pour le traitement du texte
nlp = spacy.load('fr_core_news_sm')

#  Charger le modèle de classification des émotions
def nettoyer_texte(texte):
    texte = re.sub(r"[^a-zA-ZàâçéèêëîïôûùüÿñæœÀÂÇÉÈÊËÎÏÔÛÙÜŸÑÆŒ\s]", "", texte)
    texte = texte.lower()  # Convertir en minuscules
    texte = re.sub(r"\s+", " ", texte)  # Supprimer les espaces multiples
    doc = nlp(texte)
    
    # Garder les lemmes
    lemmes = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
    return " ".join(lemmes)


df = pd.read_csv('dataset_emotions.csv')

df['Texte'] = df['Texte'].apply(nettoyer_texte)
df = df.dropna()


X_train, X_test, y_train, y_test = train_test_split(df['Texte'], df['Emotion'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)
print(df.isnull().sum())

svm_model = SVC(kernel='linear')
svm_model.fit(X_train_vec, y_train)

y_pred = svm_model.predict(X_test_vec)
print(classification_report(y_test, y_pred))

Texte      0
Emotion    0
dtype: int64
              precision    recall  f1-score   support

      Colère       0.86      0.50      0.63        24
        Joie       0.56      1.00      0.71        10
      Neutre       0.73      0.86      0.79        22
        Peur       0.70      0.50      0.58        14
   Tristesse       0.71      0.79      0.75        19

    accuracy                           0.71        89
   macro avg       0.71      0.73      0.69        89
weighted avg       0.74      0.71      0.70        89



In [5]:
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(SVC(probability=True), param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train_vec, y_train)
print("Meilleurs hyperparamètres:", grid_search.best_params_)

best_svm_model = grid_search.best_estimator_

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=   0.0s
[CV] END ...................C=0.1, gamma=auto, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=auto, kernel=rbf; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale

In [6]:
joblib.dump(best_svm_model, 'svm_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']