### Import the Dependencies

In [32]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import joblib

### Import the Dataset - Customer Feedback Dataset

In [2]:
df = pd.read_csv("sentiment-analysis.csv", header=None)

In [3]:
df.head(5)

Unnamed: 0,0
0,"Text, Sentiment, Source, Date/Time, User ID, L..."
1,"""I love this product!"", Positive, Twitter, 202..."
2,"""The service was terrible."", Negative, Yelp Re..."
3,"""This movie is amazing!"", Positive, IMDb, 2023..."
4,"""I'm so disappointed with their customer suppo..."


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       97 non-null     object
dtypes: object(1)
memory usage: 920.0+ bytes


### Data Preprocessing

In [5]:
# Remove missing values
df = df.dropna()

In [6]:
df=df[[0]]
df.head(5)

Unnamed: 0,0
0,"Text, Sentiment, Source, Date/Time, User ID, L..."
1,"""I love this product!"", Positive, Twitter, 202..."
2,"""The service was terrible."", Negative, Yelp Re..."
3,"""This movie is amazing!"", Positive, IMDb, 2023..."
4,"""I'm so disappointed with their customer suppo..."


In [7]:
# Rename the columns
df.rename(columns={0:'comments'},inplace=True)
df.head(5)

Unnamed: 0,comments
0,"Text, Sentiment, Source, Date/Time, User ID, L..."
1,"""I love this product!"", Positive, Twitter, 202..."
2,"""The service was terrible."", Negative, Yelp Re..."
3,"""This movie is amazing!"", Positive, IMDb, 2023..."
4,"""I'm so disappointed with their customer suppo..."


In [8]:
# Split the comments into multiple columns
df[['review', 'sentiment', 'source','date','user','location','confidence']] = df['comments'].str.split(',', expand=True)
df.head(5)

Unnamed: 0,comments,review,sentiment,source,date,user,location,confidence
0,"Text, Sentiment, Source, Date/Time, User ID, L...",Text,Sentiment,Source,Date/Time,User ID,Location,Confidence Score
1,"""I love this product!"", Positive, Twitter, 202...","""I love this product!""",Positive,Twitter,2023-06-15 09:23:14,@user123,New York,0.85
2,"""The service was terrible."", Negative, Yelp Re...","""The service was terrible.""",Negative,Yelp Reviews,2023-06-15 11:45:32,user456,Los Angeles,0.65
3,"""This movie is amazing!"", Positive, IMDb, 2023...","""This movie is amazing!""",Positive,IMDb,2023-06-15 14:10:22,moviefan789,London,0.92
4,"""I'm so disappointed with their customer suppo...","""I'm so disappointed with their customer suppo...",Negative,Online Forum,2023-06-15 17:35:11,forumuser1,Toronto,0.78


In [9]:
df = df.drop(0)
df.head(5)

Unnamed: 0,comments,review,sentiment,source,date,user,location,confidence
1,"""I love this product!"", Positive, Twitter, 202...","""I love this product!""",Positive,Twitter,2023-06-15 09:23:14,@user123,New York,0.85
2,"""The service was terrible."", Negative, Yelp Re...","""The service was terrible.""",Negative,Yelp Reviews,2023-06-15 11:45:32,user456,Los Angeles,0.65
3,"""This movie is amazing!"", Positive, IMDb, 2023...","""This movie is amazing!""",Positive,IMDb,2023-06-15 14:10:22,moviefan789,London,0.92
4,"""I'm so disappointed with their customer suppo...","""I'm so disappointed with their customer suppo...",Negative,Online Forum,2023-06-15 17:35:11,forumuser1,Toronto,0.78
5,"""Just had the best meal of my life!"", Positive...","""Just had the best meal of my life!""",Positive,TripAdvisor,2023-06-16 08:50:59,foodie22,Paris,0.88


In [10]:
df = df.drop(['comments'], axis=1)
df.head(5)

Unnamed: 0,review,sentiment,source,date,user,location,confidence
1,"""I love this product!""",Positive,Twitter,2023-06-15 09:23:14,@user123,New York,0.85
2,"""The service was terrible.""",Negative,Yelp Reviews,2023-06-15 11:45:32,user456,Los Angeles,0.65
3,"""This movie is amazing!""",Positive,IMDb,2023-06-15 14:10:22,moviefan789,London,0.92
4,"""I'm so disappointed with their customer suppo...",Negative,Online Forum,2023-06-15 17:35:11,forumuser1,Toronto,0.78
5,"""Just had the best meal of my life!""",Positive,TripAdvisor,2023-06-16 08:50:59,foodie22,Paris,0.88


### Text Preprocessing

In [11]:
# Define the maximum number of words to use in the model
max_words = 10000

In [12]:
# Define the maximum length of a review
max_length = 100

In [13]:
# Create a tokenizer to convert the text to sequences
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['review'])

In [14]:
# Convert the reviews to sequences
sequences = tokenizer.texts_to_sequences(df['review'])

In [15]:
# Pad the sequences to ensure they all have the same length
data = pad_sequences(sequences, maxlen=max_length)

### Model Building

In [16]:
# Define the target variable
target = pd.get_dummies(df['sentiment']).values

In [19]:
# Define the LSTM model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(max_words, 64, input_length=max_length),
    # tf.keras.layers.LSTM(64),
    tf.keras.layers.LSTM(128),
    tf.keras.layers.Dense(2, activation='softmax')
])

In [20]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [21]:
# Summary of the model
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 100, 64)           640000    
                                                                 
 lstm_4 (LSTM)               (None, 128)               98816     
                                                                 
 dense_2 (Dense)             (None, 2)                 258       
                                                                 
Total params: 739074 (2.82 MB)
Trainable params: 739074 (2.82 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
# Train the model
model.fit(data, target, epochs=25, batch_size=32)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x1fa628f0c10>

In [23]:
#Evaluate the model
scores = model.evaluate(data, target)
print("Accuracy: ", scores[1])

Accuracy:  1.0


In [24]:
# Function to make predictions on new data
def prediction(new_data):
    new_data = pad_sequences(new_data, maxlen=max_length)
    predictions = model.predict(new_data)

    # Convert the predictions to categorical
    predictions = np.argmax(predictions, axis=1)

    # Convert the predictions to sentiment labels
    sentiment_labels = ['negative', 'positive']
    predicted_sentiment = sentiment_labels[predictions[0]]

    # Print the predicted sentiment
    print('Predicted sentiment:', predicted_sentiment)

In [25]:
# Make predictions on new data
review1 = [tokenizer.texts_to_sequences(['This is a terrible product'])][0]
prediction(review1)

Predicted sentiment: negative


In [26]:
# Make predictions on new data
review2 = [tokenizer.texts_to_sequences(['A great product/service'])][0]
prediction(review2)

Predicted sentiment: positive


In [27]:
# Make predictions on new data
review3 = [tokenizer.texts_to_sequences(['We loved the food and how the staff treats customers. I recommend this place.'])][0]
prediction(review3)

Predicted sentiment: positive


In [29]:
# Make predictions on new data
review4 = [tokenizer.texts_to_sequences(["I'm upset and would not recommend purchasing from them."])][0]
prediction(review4)

Predicted sentiment: negative


In [30]:
# Make predictions on new data
review5 = [tokenizer.texts_to_sequences(["The service was awful! They ignored our requests and seemed annoyed when we asked to change what they initially brought us."])][0]
prediction(review5)

Predicted sentiment: negative


### Save the model

In [34]:
# Save the model
model.save('sentiment_analysis_model.h5')

  saving_api.save_model(


In [35]:
joblib.dump(tokenizer.word_index, 'sentiment_analysis_tokenizer.pkl')

['sentiment_analysis_tokenizer.pkl']