In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.optimizers import Adam

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/My Drive/cleaned_total_reviews.csv')
df.head()

Unnamed: 0,review_id,Bank,review,ratings,thumbs_Up_Count,review_App_Version,review_time,manual_labels,ratings_label,tokens
0,101,Barclays,not smoothest easiest apps navigate . but choi...,2,28,2.89.0,2024-02-27 12:49:00,Neutral,negative,"['not', 'smoothest', 'easiest', 'apps', 'navig..."
1,102,Barclays,app has good features eg let 's track spend ca...,3,102,2.88.2,2024-02-10 09:35:00,Neutral,neutral,"['app', 'has', 'good', 'features', 'eg', 'let'..."
2,103,Barclays,easy app use . easy navigate fast making trans...,5,84,2.89.0,2024-02-20 11:33:00,Positive,positive,"['easy', 'app', 'use', '.', 'easy', 'navigate'..."
3,104,Barclays,pointless app . i can see much i owe and pay o...,1,0,2.89.0,2024-02-28 14:06:00,Negative,negative,"['pointless', 'app', '.', 'i', 'can', 'see', '..."
4,105,Barclays,update . issue was resolved quickly and i 'm r...,5,2,2.89.0,2024-02-27 16:41:00,Positive,positive,"['update', '.', 'issue', 'was', 'resolved', 'q..."


In [None]:
# Label mapping
label_map = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
df['encoded_labels'] = df['manual_labels'].map(label_map)

In [None]:
df_neg = df[df.encoded_labels == label_map['Negative']]
df_neu = df[df.encoded_labels == label_map['Neutral']]
df_pos = df[df.encoded_labels == label_map['Positive']]

In [None]:
df_pos_resampled = resample(df_pos, replace=True, n_samples=len(df_neg), random_state=123)
df_neu_resampled = resample(df_neu, replace=True, n_samples=len(df_neg), random_state=123)


In [None]:
df_resampled = pd.concat([df_neg, df_pos_resampled, df_neu_resampled])

In [None]:
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(df_resampled['review'])

In [None]:
sequences = tokenizer.texts_to_sequences(df_resampled['review'])
data = pad_sequences(sequences, maxlen=200)

In [None]:
labels = np.array(df_resampled['encoded_labels'])

In [None]:
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=0)

In [None]:
model = Sequential()
model.add(Embedding(10000, 100, input_length=200))
model.add(Conv1D(128, 5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(3, activation='softmax'))

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(train_data, train_labels, epochs=10, validation_split=0.1, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
eval_loss, eval_accuracy = model.evaluate(test_data, test_labels)
print(f"Test Loss: {eval_loss}, Test Accuracy: {eval_accuracy}")

Test Loss: 0.1830495297908783, Test Accuracy: 0.9329445958137512


In [None]:
# Predict the test dataset
predictions = model.predict(test_data)
predicted_classes = np.argmax(predictions, axis=1)



In [None]:
from sklearn.metrics import classification_report

In [None]:
# Print classification report
print(classification_report(test_labels, predicted_classes, target_names=['Negative', 'Neutral', 'Positive']))

              precision    recall  f1-score   support

    Negative       0.93      0.91      0.92       235
     Neutral       0.90      0.94      0.92       237
    Positive       0.98      0.95      0.96       214

    accuracy                           0.93       686
   macro avg       0.93      0.93      0.93       686
weighted avg       0.93      0.93      0.93       686

