# Importation des packages

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import google.generativeai as genai
import time

# Importation des données

Ajoutez un raccourci de ce dossier à votre google drive :

https://drive.google.com/drive/folders/1mx-CAzT10YKrmxHfYDP_1Oef7PVGUr7s?usp=sharing

In [5]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [6]:
data = pd.read_csv('/content/drive/MyDrive/data_classification_commentaires_toxiques/train.csv')
data.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


# Etude du jeu de données

Nous commencons par réduire le nombre de donnée afin de réduire le temps de calcul

In [8]:
# Reduce the dataset size to 30 samples
reduced_data = data.sample(n=30, random_state=42)

Dans un premier temps nous allons faire en sorte de simplement détecter si un commentaire est toxique

In [9]:
# Select relevant columns
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

# Create the 'is_toxic' column
reduced_data['is_toxic'] = reduced_data[toxicity_cols].any(axis=1).astype(int)

# Drop the toxicity columns
reduced_data.drop(columns=toxicity_cols, inplace=True)

In [10]:
print(reduced_data.shape)
reduced_data.head()

(30, 3)


Unnamed: 0,id,comment_text,is_toxic
119105,7ca72b5b9c688e9e,"Geez, are you forgetful! We've already discus...",0
131631,c03f72fd8f8bf54f,Carioca RFA \n\nThanks for your support on my ...,0
125326,9e5b8e8fc1ff2e84,"""\n\n Birthday \n\nNo worries, It's what I do ...",0
111256,5332799e706665a6,Pseudoscience category? \n\nI'm assuming that ...,0
83590,dfa7d8f0b4366680,"(and if such phrase exists, it would be provid...",0


Nous allons dans un premier temps tenter d'utiliser un LLM afin de résoudre ce problème

In [11]:
genai.configure(api_key="AIzaSyDPPot2LGc7hIOrMUxTYHmpWoYsBHMJKgs")

In [12]:
def is_toxic(text):
    """
    Fonction pour classifier la toxicité d'une phrase en utilisant un LLM via l'API OpenAI.
    Entrée :
        text (str) : Phrase dont on veut analyser la toxicité
    Sortie :
        sentiment (str) : '1, si la phrase est toxique, '0' sinon.
    """

    # 2. Préparation de la requête à l'API
    prompt = f"""
    Is the following comment toxic?

    "{text}"

    Answer with either "1" for True or "0" for False.
    """

    # 3. Faite attendre le programme pendant 2 secondes
    time.sleep(4)

    try:
        # 4. Initialisez le modèle à utiliser
        model = genai.GenerativeModel('gemini-1.5-flash')

        # 5. Envoie le prompt au modèle
        response = model.generate_content(prompt)

        # 6. Extraction de la réponse du modèle
        toxic = int(response.text)

        print("Function debug : ", toxic)

        return toxic

    except Exception as e:
        return f"Erreur : {str(e)}"

In [13]:
toxic_comment = "I love you"
is_toxic(toxic_comment)

Function debug :  0


0

In [14]:
toxic_comment = "i hate you"
is_toxic(toxic_comment)

Function debug :  1


1

In [15]:
# Utilisation sur le dataset
reduced_data['is_toxic_llm'] = reduced_data['comment_text'].apply(is_toxic)

Function debug :  1
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  1
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  1
Function debug :  0
Function debug :  1
Function debug :  1
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  1
Function debug :  0
Function debug :  1
Function debug :  1
Function debug :  0
Function debug :  0
Function debug :  1
Function debug :  1
Function debug :  0
Function debug :  0
Function debug :  1


In [7]:
# Utiliser une matrice de confusion pour voir la qualité du model
# ou F1 score
from sklearn.metrics import confusion_matrix, f1_score

# Calculer la matrice de confusion
cm = confusion_matrix(reduced_data['is_toxic'], reduced_data['is_toxic_llm'])

# Calculer le score F1
f1 = f1_score(reduced_data['is_toxic'], reduced_data['is_toxic_llm'])

print("Matrice de confusion :")
print(cm)
print("Score F1 :")
print(f1)

NameError: name 'reduced_data' is not defined

Nous allons maintenant refaire la même chose mais avec la classification des différents types de toxicités

In [None]:
reduced_data = data.sample(n=30, random_state=42)
reduced_data.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
119105,7ca72b5b9c688e9e,"Geez, are you forgetful! We've already discus...",0,0,0,0,0,0
131631,c03f72fd8f8bf54f,Carioca RFA \n\nThanks for your support on my ...,0,0,0,0,0,0
125326,9e5b8e8fc1ff2e84,"""\n\n Birthday \n\nNo worries, It's what I do ...",0,0,0,0,0,0
111256,5332799e706665a6,Pseudoscience category? \n\nI'm assuming that ...,0,0,0,0,0,0
83590,dfa7d8f0b4366680,"(and if such phrase exists, it would be provid...",0,0,0,0,0,0


In [None]:
def classify_toxic(text):
    """
    Fonction pour classifier la toxicité d'une phrase en utilisant un LLM via l'API OpenAI.
    Entrée :
        text (str) : Phrase dont on veut analyser la toxicité
    Sortie :
        sentiment (str) : '1, si la phrase est toxic, '2' si la phrase est severe_toxic, '3' si la phrase est obscene, '4' si la phrase est une insult, '5' si la phrase est un identity_hate, '0' sinon.
    """

    # 2. Préparation de la requête à l'API
    prompt = f"""
    Classify the toxicity of the following comment:

    "{text}"

    Answer with one of the following options:
    '1' for toxic
    '2' for severe_toxic
    '3' for obscene
    '4' for threat
    '5' for insult
    '6' for identity_hate
    '0' for not toxic
    """

    # 3. Faite attendre le programme pendant 2 secondes
    time.sleep(4)

    try:
        # 4. Initialisez le modèle à utiliser
        model = genai.GenerativeModel('gemini-1.5-flash')

        # 5. Envoie le prompt au modèle
        response = model.generate_content(prompt)

        # 6. Extraction de la réponse du modèle
        toxic = int(response.text)

        print("Function debug : ", toxic)

        return toxic

    except Exception as e:
        return f"Erreur : {str(e)}"

In [None]:
toxic_comment = "I love you"
classify_toxic(toxic_comment)

Function debug :  0


0

In [None]:
toxic_comment = "i hate you"
classify_toxic(toxic_comment)

Function debug :  5


5

In [None]:
# Utilisation sur le dataset
reduced_data['is_toxic_llm'] = reduced_data['comment_text'].apply(classify_toxic)

Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  5
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  1
Function debug :  0


ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 254.41ms


Function debug :  2
Function debug :  0
Function debug :  0
Function debug :  0
Function debug :  5
Function debug :  2
Function debug :  0
Function debug :  0
Function debug :  1


In [None]:
def encode_true_label(row):
    """Encodes the true label into a numerical format matching classify_toxic output."""
    if row['toxic'] == 1:
        return 1
    elif row['severe_toxic'] == 1:
        return 2
    elif row['obscene'] == 1:
        return 3
    elif row['threat'] == 1:
        return 4
    elif row['insult'] == 1:
        return 5
    elif row['identity_hate'] == 1:
        return 6
    else:
        return 0

# Create a new column with encoded true labels
reduced_data['true_label_encoded'] = reduced_data.apply(encode_true_label, axis=1)

# Calculate accuracy
accuracy = (reduced_data['true_label_encoded'] == reduced_data['is_toxic_llm']).mean()
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 80.00%


Nous allons donc passer à une méthode plus efficasse utilisant de l'embedding et des RNN

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
# 10% du jeu de donnée
reduced_data = data.sample(frac=0.1)

# Select relevant columns
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

# Create the 'is_toxic' column
reduced_data['is_toxic'] = reduced_data[toxicity_cols].any(axis=1).astype(int)

# Drop the toxicity columns
reduced_data.drop(columns=toxicity_cols, inplace=True)

# 1. Tokenize the text
tokenizer = Tokenizer(num_words=10000) # Adjust num_words as needed
tokenizer.fit_on_texts(reduced_data['comment_text'])
sequences = tokenizer.texts_to_sequences(reduced_data['comment_text'])

# 2. Pad sequences to equal length
max_length = 100 # Adjust max_length as needed
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# 3. Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, reduced_data['is_toxic'], test_size=0.2, random_state=42
)

In [None]:
model = Sequential()
model.add(Embedding(10000, 128, input_length=max_length)) # Adjust embedding dimensions as needed
model.add(LSTM(64)) # Adjust LSTM units as needed
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32) # Adjust epochs and batch_size as needed

Epoch 1/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 88ms/step - accuracy: 0.8951 - loss: 0.3306
Epoch 2/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 84ms/step - accuracy: 0.9566 - loss: 0.1246
Epoch 3/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 83ms/step - accuracy: 0.9734 - loss: 0.0788
Epoch 4/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 84ms/step - accuracy: 0.9888 - loss: 0.0350
Epoch 5/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 84ms/step - accuracy: 0.9935 - loss: 0.0197
Epoch 6/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 87ms/step - accuracy: 0.9956 - loss: 0.0140
Epoch 7/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 82ms/step - accuracy: 0.9976 - loss: 0.0088
Epoch 8/10
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 86ms/step - accuracy: 0.9958 - loss: 0.0141
Epoch 9/10
[1m399/399[

<keras.src.callbacks.history.History at 0x79f2ad0203d0>

In [None]:
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: {}'.format(accuracy))

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.9350 - loss: 0.4394
Accuracy: 0.9338972568511963


Maintenant nous allons les classicier en fonction des catégories

In [None]:
# 10% du jeu de donnée
reduced_data = data.sample(frac=1)

# 1. Tokenize the text (same as before)
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(reduced_data['comment_text'])
sequences = tokenizer.texts_to_sequences(reduced_data['comment_text'])

# 2. Pad sequences (same as before)
max_length = 100
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# 3. One-hot encode labels
toxicity_cols = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
y = pd.get_dummies(reduced_data[toxicity_cols]).values  # One-hot encoding

# 4. Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, y, test_size=0.2, random_state=42
)

In [None]:
model = Sequential()
model.add(Embedding(10000, 128, input_length=max_length))
model.add(LSTM(64))
model.add(Dense(6, activation='softmax'))  # 6 output units for 6 categories

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])



In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32)

Epoch 1/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 7ms/step - accuracy: 0.0197 - loss: 0.3693
Epoch 2/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 7ms/step - accuracy: 0.0603 - loss: 0.3807
Epoch 3/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 8ms/step - accuracy: 0.0681 - loss: 0.3655
Epoch 4/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 7ms/step - accuracy: 0.0673 - loss: 0.3630
Epoch 5/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 7ms/step - accuracy: 0.0721 - loss: 0.3582
Epoch 6/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 7ms/step - accuracy: 0.0745 - loss: 0.3548
Epoch 7/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 8ms/step - accuracy: 0.0954 - loss: 0.3544
Epoch 8/10
[1m3990/3990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 8ms/step - accuracy: 0.1090 - loss: 0.3429
Epoch 9/10
[1m3

<keras.src.callbacks.history.History at 0x7bf7412163d0>

In [None]:
_, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: {}'.format(accuracy))

[1m998/998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.9636 - loss: 0.3353
Accuracy: 0.9614601135253906


In [None]:
# Matrice de confusion
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
print(cm)

[1m998/998[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step
[[30671    13   814    14   205     7]
 [    0     0     0     0     0     0]
 [   87     0    12     0     2     1]
 [    7     0     0     0     0     0]
 [   52     1    18     1     2     0]
 [    7     0     0     0     1     0]]


In [None]:
def toxicity_classifier_pipeline(text):
    """
    Pipeline pour classifier la toxicité d'une phrase.
    Entrée :
        text (str) : Phrase brute à classifier.
    Sortie :
        prediction (array) : Probabilités prédites pour chaque catégorie de toxicité.
    """
    # a. Tokeniser le texte
    sequence = tokenizer.texts_to_sequences([text])

    # b. Remplir la séquence pour correspondre à la longueur maximale
    padded_sequence = pad_sequences(sequence, maxlen=100)  # Ajuster maxlen si nécessaire

    # c. Faire une prédiction en utilisant le modèle chargé
    prediction = model.predict(padded_sequence)

    return prediction

In [None]:
raw_text = "Ceci est un commentaire toxique."
prediction = toxicity_classifier_pipeline(raw_text)

# 5. Obtenir la catégorie de toxicité, incluant "non toxique"
categories = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate', 'non toxique']

# Trouver l'index de la catégorie avec la probabilité maximale
predicted_category_index = prediction.argmax()

# Si la probabilité maximale est inférieure à un seuil, on considère le commentaire comme "non toxique"
threshold = 0.5  # Vous pouvez ajuster ce seuil
if prediction[0][predicted_category_index] < threshold:
    predicted_category_index = len(categories) - 1  # L'index de "non toxique"

predicted_category = categories[predicted_category_index]

print(f"Le commentaire est classé comme : {predicted_category}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Le commentaire est classé comme : non toxique


# Préparation des données

In [None]:
# Your Code

# Entraînement du modèle baseline

In [None]:
# Your Code

# Itération de la modélisation

In [None]:
# Your Code

In [None]:
model.save('toxicity_classifier_model_all_data.keras')

In [None]:
from tensorflow import keras
loaded_model = keras.models.load_model('toxicity_classifier_model_all_data.keras')

  saveable.load_own_variables(weights_store.get(inner_path))
