# Installation et Chargement des Bibliothèques

In [1]:
# Importation des Bibliothèques
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report
import re
from bs4 import BeautifulSoup
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [3]:
df = pd.read_csv('/content/IMDB_Dataset.csv', sep=';{6}', engine='python')
print("Colonne disponible:", df.columns.tolist())
df.head()

Colonne disponible: ['review,sentiment', 'Unnamed: 1']


Unnamed: 0,"review,sentiment",Unnamed: 1
0,One of the other reviewers has mentioned that ...,
1,"""A wonderful little production. <br /><br />Th...",
2,"""I thought this was a wonderful way to spend t...",
3,Basically there's a family where a little boy ...,
4,"""Petter Mattei's """"Love in the Time of Money""""...",


# Pretriatement des données

In [4]:
def preprocess_reviews(data_path):
    """
    Prétraite l'ensemble des données relatives aux critiques de films en améliorant la gestion et le formatage du texte.

    Paramètres :
    data_path (str) : Chemin d'accès au fichier de données

    Retourne :
    pandas.DataFrame : DataFrame correctement structuré et nettoyé
    """
    # Lecture du fichier csv
    df = pd.read_csv(data_path, sep=';{6}', engine='python')

    # Leparation des deux colonne car elle sont jointes
    split_cols = df['review,sentiment'].str.split(',(?=[^,]*$)', expand=True)

    # Creation de la dataframe
    clean_df = pd.DataFrame({
        'review': split_cols[0],
        'sentiment': split_cols[1]
    })

    def clean_text(text):
        try:
            # Suppresion des balises HTML
            text = BeautifulSoup(str(text), "html.parser").get_text()

            # Convertir en lowercase
            text = text.lower()

            # Suppression de certains caractères speciaux
            text = re.sub(r'[^a-z\s.,!?]', ' ', text)

            # Normaliser les espaces blancs
            text = re.sub(r'\s+', ' ', text)

            # Suppression des stopwords
            stop_words = set(stopwords.words('english'))
            words = text.split()
            filtered_words = [word for word in words if word.lower() not in stop_words]

            # Concatenation des mots apres suppresion des stopwords
            text = ' '.join(filtered_words).strip()
            return re.sub(r'\s+', ' ', text)

        except Exception as e:
            print(f"Error processing text: {e}")
            return ""

    # Application de la fonction et numerisation des sentiments
    clean_df['review_clean'] = clean_df['review'].apply(clean_text)
    clean_df['sentiment_num'] = clean_df['sentiment'].apply(
        lambda x: 1 if 'positive' in str(x).lower() else 0
    )

    # S'assurer que l'ordre des colonnes est correct et résoudre les problèmes de formatage restants
    clean_df = clean_df[['review', 'sentiment', 'review_clean', 'sentiment_num']]
    clean_df = clean_df.replace(r'^\s*$', None, regex=True).fillna('')

    return clean_df

In [5]:
df = preprocess_reviews('/content/IMDB_Dataset.csv')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

  text = BeautifulSoup(str(text), "html.parser").get_text()


In [11]:
df.head()

Unnamed: 0,review,sentiment,review_clean,sentiment_num
0,"One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fact that it goes where other shows wouldn't dare. Forget pretty pictures painted for mainstream audiences, forget charm, forget romance...OZ doesn't mess around. The first episode I ever saw struck me as so nasty it was surreal, I couldn't say I was ready for it, but as I watched more, I developed a taste for Oz, and got accustomed to the high levels of graphic violence. Not just violence, but injustice (crooked guards who'll be sold out for a nickel, inmates who'll kill on order and get away with it, well mannered, middle class inmates being turned into prison bitches due to their lack of street skills or prison experience) Watching Oz, you may become comfortable with what is uncomfortable viewing....thats if you can get in touch with your darker side.",positive,"one reviewers mentioned watching oz episode hooked. right, exactly happened me.the first thing struck oz brutality unflinching scenes violence, set right word go. trust me, show faint hearted timid. show pulls punches regards drugs, sex violence. hardcore, classic use word.it called oz nickname given oswald maximum security state penitentary. focuses mainly emerald city, experimental section prison cells glass fronts face inwards, privacy high agenda. em city home many..aryans, muslims, gangstas, latinos, christians, italians, irish more....so scuffles, death stares, dodgy dealings shady agreements never far away.i would say main appeal show due fact goes shows dare. forget pretty pictures painted mainstream audiences, forget charm, forget romance...oz mess around. first episode ever saw struck nasty surreal, say ready it, watched more, developed taste oz, got accustomed high levels graphic violence. violence, injustice crooked guards sold nickel, inmates kill order get away it, well mannered, middle class inmates turned prison bitches due lack street skills prison experience watching oz, may become comfortable uncomfortable viewing....thats get touch darker side.",1
1,"""A wonderful little production. <br /><br />The filming technique is very unassuming- very old-time-BBC fashion and gives a comforting, and sometimes discomforting, sense of realism to the entire piece. <br /><br />The actors are extremely well chosen- Michael Sheen not only """"has got all the polari"""" but he has all the voices down pat too! You can truly see the seamless editing guided by the references to Williams' diary entries, not only is it well worth the watching but it is a terrificly written and performed piece. A masterful production about one of the great master's of comedy and his life. <br /><br />The realism really comes home with the little things: the fantasy of the guard which, rather than use the traditional 'dream' techniques remains solid then disappears. It plays on our knowledge and our senses, particularly with the scenes concerning Orton and Halliwell and the sets (particularly of their flat with Halliwell's murals decorating every surface) are terribly well done.","positive""","wonderful little production. filming technique unassuming old time bbc fashion gives comforting, sometimes discomforting, sense realism entire piece. actors extremely well chosen michael sheen got polari voices pat too! truly see seamless editing guided references williams diary entries, well worth watching terrificly written performed piece. masterful production one great master comedy life. realism really comes home little things fantasy guard which, rather use traditional dream techniques remains solid disappears. plays knowledge senses, particularly scenes concerning orton halliwell sets particularly flat halliwell murals decorating every surface terribly well done.",1
2,"""I thought this was a wonderful way to spend time on a too hot summer weekend, sitting in the air conditioned theater and watching a light-hearted comedy. The plot is simplistic, but the dialogue is witty and the characters are likable (even the well bread suspected serial killer). While some may be disappointed when they realize this is not Match Point 2: Risk Addiction, I thought it was proof that Woody Allen is still fully in control of the style many of us have grown to love.<br /><br />This was the most I'd laughed at one of Woody's comedies in years (dare I say a decade?). While I've never been impressed with Scarlet Johanson, in this she managed to tone down her """"sexy"""" image and jumped right into a average, but spirited young woman.<br /><br />This may not be the crown jewel of his career, but it was wittier than """"Devil Wears Prada"""" and more interesting than """"Superman"""" a great comedy to go see with friends.","positive""","thought wonderful way spend time hot summer weekend, sitting air conditioned theater watching light hearted comedy. plot simplistic, dialogue witty characters likable even well bread suspected serial killer . may disappointed realize match point risk addiction, thought proof woody allen still fully control style many us grown love.this laughed one woody comedies years dare say decade? . never impressed scarlet johanson, managed tone sexy image jumped right average, spirited young woman.this may crown jewel career, wittier devil wears prada interesting superman great comedy go see friends.",1
3,"Basically there's a family where a little boy (Jake) thinks there's a zombie in his closet & his parents are fighting all the time.<br /><br />This movie is slower than a soap opera... and suddenly, Jake decides to become Rambo and kill the zombie.<br /><br />OK, first of all when you're going to make a film you must Decide if its a thriller or a drama! As a drama the movie is watchable. Parents are divorcing & arguing like in real life. And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots.<br /><br />3 out of 10 just for the well playing parents & descent dialogs. As for the shots with Jake: just ignore them.",negative,"basically family little boy jake thinks zombie closet parents fighting time.this movie slower soap opera... suddenly, jake decides become rambo kill zombie.ok, first going make film must decide thriller drama! drama movie watchable. parents divorcing arguing like real life. jake closet totally ruins film! expected see boogeyman similar movie, instead watched drama meaningless thriller spots. well playing parents descent dialogs. shots jake ignore them.",0
4,"""Petter Mattei's """"Love in the Time of Money"""" is a visually stunning film to watch. Mr. Mattei offers us a vivid portrait about human relations. This is a movie that seems to be telling us what money, power and success do to people in the different situations we encounter. <br /><br />This being a variation on the Arthur Schnitzler's play about the same theme, the director transfers the action to the present time New York where all these different characters meet and connect. Each one is connected in one way, or another to the next person, but no one seems to know the previous point of contact. Stylishly, the film has a sophisticated luxurious look. We are taken to see how these people live and the world they live in their own habitat.<br /><br />The only thing one gets out of all these souls in the picture is the different stages of loneliness each one inhabits. A big city is not exactly the best place in which human relations find sincere fulfillment, as one discerns is the case with most of the people we encounter.<br /><br />The acting is good under Mr. Mattei's direction. Steve Buscemi, Rosario Dawson, Carol Kane, Michael Imperioli, Adrian Grenier, and the rest of the talented cast, make these characters come alive.<br /><br />We wish Mr. Mattei good luck and await anxiously for his next work.","positive""","petter mattei love time money visually stunning film watch. mr. mattei offers us vivid portrait human relations. movie seems telling us money, power success people different situations encounter. variation arthur schnitzler play theme, director transfers action present time new york different characters meet connect. one connected one way, another next person, one seems know previous point contact. stylishly, film sophisticated luxurious look. taken see people live world live habitat.the thing one gets souls picture different stages loneliness one inhabits. big city exactly best place human relations find sincere fulfillment, one discerns case people encounter.the acting good mr. mattei direction. steve buscemi, rosario dawson, carol kane, michael imperioli, adrian grenier, rest talented cast, make characters come alive.we wish mr. mattei good luck await anxiously next work.",1


In [12]:
# Nettoyage des Textes
df['review_clean'] = df['review_clean'].str.lower()
df['review_clean'] = df['review_clean'].str.replace(r'http\S+|www\S+', '', regex=True)  # Supprimer les URL
df['review_clean'] = df['review_clean'].str.replace(r'[^a-zA-Z\s]', '', regex=True)  # Garder uniquement les lettres

In [13]:
df["review_clean"].head()

Unnamed: 0,review_clean
0,one reviewers mentioned watching oz episode hooked right exactly happened methe first thing struck oz brutality unflinching scenes violence set right word go trust me show faint hearted timid show pulls punches regards drugs sex violence hardcore classic use wordit called oz nickname given oswald maximum security state penitentary focuses mainly emerald city experimental section prison cells glass fronts face inwards privacy high agenda em city home manyaryans muslims gangstas latinos christians italians irish moreso scuffles death stares dodgy dealings shady agreements never far awayi would say main appeal show due fact goes shows dare forget pretty pictures painted mainstream audiences forget charm forget romanceoz mess around first episode ever saw struck nasty surreal say ready it watched more developed taste oz got accustomed high levels graphic violence violence injustice crooked guards sold nickel inmates kill order get away it well mannered middle class inmates turned prison bitches due lack street skills prison experience watching oz may become comfortable uncomfortable viewingthats get touch darker side
1,wonderful little production filming technique unassuming old time bbc fashion gives comforting sometimes discomforting sense realism entire piece actors extremely well chosen michael sheen got polari voices pat too truly see seamless editing guided references williams diary entries well worth watching terrificly written performed piece masterful production one great master comedy life realism really comes home little things fantasy guard which rather use traditional dream techniques remains solid disappears plays knowledge senses particularly scenes concerning orton halliwell sets particularly flat halliwell murals decorating every surface terribly well done
2,thought wonderful way spend time hot summer weekend sitting air conditioned theater watching light hearted comedy plot simplistic dialogue witty characters likable even well bread suspected serial killer may disappointed realize match point risk addiction thought proof woody allen still fully control style many us grown lovethis laughed one woody comedies years dare say decade never impressed scarlet johanson managed tone sexy image jumped right average spirited young womanthis may crown jewel career wittier devil wears prada interesting superman great comedy go see friends
3,basically family little boy jake thinks zombie closet parents fighting timethis movie slower soap opera suddenly jake decides become rambo kill zombieok first going make film must decide thriller drama drama movie watchable parents divorcing arguing like real life jake closet totally ruins film expected see boogeyman similar movie instead watched drama meaningless thriller spots well playing parents descent dialogs shots jake ignore them
4,petter mattei love time money visually stunning film watch mr mattei offers us vivid portrait human relations movie seems telling us money power success people different situations encounter variation arthur schnitzler play theme director transfers action present time new york different characters meet connect one connected one way another next person one seems know previous point contact stylishly film sophisticated luxurious look taken see people live world live habitatthe thing one gets souls picture different stages loneliness one inhabits big city exactly best place human relations find sincere fulfillment one discerns case people encounterthe acting good mr mattei direction steve buscemi rosario dawson carol kane michael imperioli adrian grenier rest talented cast make characters come alivewe wish mr mattei good luck await anxiously next work


In [14]:
df = df[["review_clean", "sentiment_num"]]
df.head()

Unnamed: 0,review_clean,sentiment_num
0,one reviewers mentioned watching oz episode hooked right exactly happened methe first thing struck oz brutality unflinching scenes violence set right word go trust me show faint hearted timid show pulls punches regards drugs sex violence hardcore classic use wordit called oz nickname given oswald maximum security state penitentary focuses mainly emerald city experimental section prison cells glass fronts face inwards privacy high agenda em city home manyaryans muslims gangstas latinos christians italians irish moreso scuffles death stares dodgy dealings shady agreements never far awayi would say main appeal show due fact goes shows dare forget pretty pictures painted mainstream audiences forget charm forget romanceoz mess around first episode ever saw struck nasty surreal say ready it watched more developed taste oz got accustomed high levels graphic violence violence injustice crooked guards sold nickel inmates kill order get away it well mannered middle class inmates turned prison bitches due lack street skills prison experience watching oz may become comfortable uncomfortable viewingthats get touch darker side,1
1,wonderful little production filming technique unassuming old time bbc fashion gives comforting sometimes discomforting sense realism entire piece actors extremely well chosen michael sheen got polari voices pat too truly see seamless editing guided references williams diary entries well worth watching terrificly written performed piece masterful production one great master comedy life realism really comes home little things fantasy guard which rather use traditional dream techniques remains solid disappears plays knowledge senses particularly scenes concerning orton halliwell sets particularly flat halliwell murals decorating every surface terribly well done,1
2,thought wonderful way spend time hot summer weekend sitting air conditioned theater watching light hearted comedy plot simplistic dialogue witty characters likable even well bread suspected serial killer may disappointed realize match point risk addiction thought proof woody allen still fully control style many us grown lovethis laughed one woody comedies years dare say decade never impressed scarlet johanson managed tone sexy image jumped right average spirited young womanthis may crown jewel career wittier devil wears prada interesting superman great comedy go see friends,1
3,basically family little boy jake thinks zombie closet parents fighting timethis movie slower soap opera suddenly jake decides become rambo kill zombieok first going make film must decide thriller drama drama movie watchable parents divorcing arguing like real life jake closet totally ruins film expected see boogeyman similar movie instead watched drama meaningless thriller spots well playing parents descent dialogs shots jake ignore them,0
4,petter mattei love time money visually stunning film watch mr mattei offers us vivid portrait human relations movie seems telling us money power success people different situations encounter variation arthur schnitzler play theme director transfers action present time new york different characters meet connect one connected one way another next person one seems know previous point contact stylishly film sophisticated luxurious look taken see people live world live habitatthe thing one gets souls picture different stages loneliness one inhabits big city exactly best place human relations find sincere fulfillment one discerns case people encounterthe acting good mr mattei direction steve buscemi rosario dawson carol kane michael imperioli adrian grenier rest talented cast make characters come alivewe wish mr mattei good luck await anxiously next work,1


# Tokenisation et Padding

In [15]:
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df['review_clean'])
X = tokenizer.texts_to_sequences(df['review_clean'])
X = pad_sequences(X, padding='post', maxlen=100)

# Séparation des Données et construction du Modèle



In [17]:
# Séparation des Données
y = df['sentiment_num'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
# Construction du Modèle
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=128, input_length=100))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))



In [19]:
# Compilation du Modèle
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# Entraînement du Modèle et Évaluation du Modèle

In [20]:
# Entraînement du Modèle
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 263ms/step - accuracy: 0.6267 - loss: 0.6396 - val_accuracy: 0.8048 - val_loss: 0.4745
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 259ms/step - accuracy: 0.8096 - loss: 0.4598 - val_accuracy: 0.8257 - val_loss: 0.4506
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 246ms/step - accuracy: 0.8473 - loss: 0.3839 - val_accuracy: 0.8489 - val_loss: 0.3642
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 249ms/step - accuracy: 0.8912 - loss: 0.2874 - val_accuracy: 0.8696 - val_loss: 0.3159
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 252ms/step - accuracy: 0.9168 - loss: 0.2285 - val_accuracy: 0.8717 - val_loss: 0.3192


<keras.src.callbacks.history.History at 0x79f2c2704850>

In [21]:
# Évaluation du Modèle
y_pred = (model.predict(X_test) > 0.5).astype("int32")
print(classification_report(y_test, y_pred))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 72ms/step
              precision    recall  f1-score   support

           0       0.87      0.87      0.87      4961
           1       0.87      0.87      0.87      5039

    accuracy                           0.87     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.87      0.87      0.87     10000



# Prédiction de sentiment pour de nouveaux avis

In [22]:
# Prédiction sur de Nouveaux avis
def predict_sentiment(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+', '', text)  # Supprimer les URL
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Garder uniquement les lettres
    text_seq = tokenizer.texts_to_sequences([text])
    text_pad = pad_sequences(text_seq, padding='post', maxlen=100)
    sentiment = model.predict(text_pad)
    return 'Positif' if sentiment > 0.5 else 'Négatif'

In [26]:
print(predict_sentiment("I love this movie so much !"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Positif


In [27]:
print(predict_sentiment("I hate this movie so much !"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Négatif
