<a href="https://colab.research.google.com/github/MahdiFaourr/MahdiFaourr/blob/main/shopper_sentiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opendatasets
!pip install gradio==3.14.0

In [None]:
od.download("https://www.kaggle.com/datasets/nelgiriyewithana/shoppersentiments")

In [4]:
import pandas as pd
df = pd.read_csv("/content/shoppersentiments/TeePublic_review.csv", encoding='latin1')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [5]:
data=df[['review','review-label']]

In [None]:
data.head()

In [6]:
data=data.dropna()

In [None]:
data.shape

In [None]:
data['review-label'].value_counts()

In [None]:
# Import necessary libraries and functions
import pandas as pd
import opendatasets as od
import numpy as np
import re
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.metrics import Precision
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense,Embedding,Bidirectional,GRU,Dropout,BatchNormalization
from sklearn.model_selection import train_test_split
import pickle
import gradio as gr
from keras.optimizers import Adam

In [19]:
# Define necessary objects
precision_metric = Precision()
stemmer=PorterStemmer()
English_stopwords=stopwords.words('english')

In [20]:
# Define a function that process texts
def text_cleaner(text):
  text=text.lower()# Convert to lower cases
  text_with_no_punctuations = re.sub(r'[^a-zA-Z0-9]', ' ', text) # Remove non alphabatic symbols
  tokens=word_tokenize(text_with_no_punctuations) # tokeize words
  stemmed_text = [stemmer.stem(word) for word in tokens] # Apply stemming
  text = ' '.join(stemmed_text)
  text_with_no_stopwords=[word for word in text.split() if word not in English_stopwords]# remove english stopwords
  final_cleaned_text=' '.join(text_with_no_stopwords)
  return final_cleaned_text

In [10]:
# Define a function that returns the length of a text
def count_words(text):
  return len(text.split())

In [11]:
data['cleaned_review']=data['review'].apply(text_cleaner)

In [12]:
data['cleaned_review_length']=data['cleaned_review'].apply(count_words)

In [None]:
data['cleaned_review_length'].describe()

In [13]:
Tok=Tokenizer()
Tok.fit_on_texts(data['cleaned_review'])
vocab_size=len(Tok.word_index)+1
print(vocab_size)

38028


In [14]:
y=to_categorical(data['review-label'])
x=Tok.texts_to_sequences(data['cleaned_review'])
x_padded=pad_sequences(x,maxlen=20,padding='post',truncating='post')

In [15]:
# Split the data into training and testing parts
x_train,x_test,y_train,y_test=train_test_split(x_padded,y,test_size=0.2,random_state=42)

In [17]:
# Save tokenizer to a file
with open("shoppersentiments_tokenizer.pickle", "wb") as f:
    pickle.dump(Tok, f)

In [20]:
model=Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=110, input_length=20))
model.add(Bidirectional(LSTM(200,return_sequences=False)))
model.add(Dense(256,activation="relu"))
model.add(BatchNormalization())
model.add(Dense(45,activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(6,activation='softmax'))


In [None]:
# Define a checkpoint callback to save the best
adam=Adam(learning_rate=0.0001)
# Compile and train the model on training dataset over 8 epochs
model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['acc',precision_metric])
history=model.fit(x_train,y_train,validation_split=0.2,epochs=8,batch_size=64)

In [22]:
# Evaluate the model on testing dataset
model.evaluate(x_test,y_test)



[0.5728018879890442, 0.8012722134590149, 0.8586050271987915]

In [None]:
# Save the model as HDF5 file
model.save("shopper_sentiment_model.h5")

In [12]:
# Create a function for demo
def review_rating(text):
  # clean the input text
    cleaned_text = text_cleaner(text)
  # convert the cleaned text to a sequence of integers
    text_array = Tok.texts_to_sequences([cleaned_text])
  # pad the sequence
    padded_array = pad_sequences(text_array, maxlen=20, padding='post', truncating='post')
  # use the model created to generate predictions
    prediction = model.predict(padded_array)

    # Find the predicted class
    predicted_class = np.argmax(prediction)
    return predicted_class

In [24]:
# Example usage 2
review_rating("Very bad and cold tee")



1

In [25]:
# Example usage 2
review_rating("These guys offer the best customer service in the city!")



5

In [None]:
# Create Gradio interface
iface = gr.Interface(
    fn=review_rating,
    inputs=gr.inputs.Textbox(lines=5, label="Enter your text here"),
    outputs=gr.outputs.Label(num_top_classes=6),
    title="Sentiment Analysis",
    description="Enter a text and the model will predict the level of satisfication."
)
# Launch the interface
iface.launch(debug='True')