In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.tokenize import word_tokenize
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Embedding, Flatten, Dropout
from tensorflow.keras.models import load_model
from sklearn.utils import resample
import re
import nltk
from nltk.corpus import stopwords
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
import pickle
import string

In [2]:
df = pd.read_csv('data.csv')
df = df[['statement', 'status']]
df = df.rename(columns={'statement': 'content', 'status': 'sentiment'})
df.dropna(inplace=True)
df.drop_duplicates(subset=['content'], inplace=True)

In [3]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', ' ', text)  # Remove numbers
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'[%s]' % re.escape(string.punctuation), ' ', text)  # Remove punctuation
    return text

In [4]:
df['text_clean'] = df['content'].apply(preprocess_text)

df = df[df['text_clean'].str.split().str.len() <= 200]
df.reset_index(drop=True, inplace=True)


In [5]:
# Label Encoding
label_encoder = LabelEncoder()
df['encoded_labels'] = label_encoder.fit_transform(df['sentiment'])

max_sequence_length = 200

In [6]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text_clean'])
token_list = tokenizer.texts_to_sequences(df['text_clean'])
max_sequence_length = max([len(seq) for seq in token_list])
padded_sequences = pad_sequences(token_list, maxlen=max_sequence_length, padding='pre', truncating='pre')

In [7]:
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, df['encoded_labels'], test_size=0.30, random_state=42)


In [8]:

model = load_model("sentiment_model.keras")

  saveable.load_own_variables(weights_store.get(inner_path))


In [9]:
def predict_text(text):
    text = preprocess_text(text)
    token_list = tokenizer.texts_to_sequences([text])
    token_padded = pad_sequences(token_list, maxlen=max_sequence_length, padding='pre', truncating='pre')
    prediction = model.predict(token_padded)
    predicted_class_index = np.argmax(prediction)
    predicted_label = label_encoder.inverse_transform([predicted_class_index])[0]
    return predicted_label

In [10]:
y_pred = model.predict(X_val)

[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 116ms/step


In [11]:
y_pred_labels = np.argmax(y_pred, axis=1)
y_pred_labels

array([6, 2, 3, ..., 2, 6, 5], dtype=int64)

In [12]:
y_val

9532     6
34662    3
5152     3
33044    2
36437    3
        ..
26011    3
4320     3
31686    2
17255    2
7917     6
Name: encoded_labels, Length: 12683, dtype: int32

In [13]:
from sklearn.metrics import precision_score, recall_score, accuracy_score,f1_score



# Calculate metrics
precision = precision_score(y_val, y_pred_labels, average = 'macro')  # Use 'macro' or 'weighted' for multiclass
recall = recall_score(y_val, y_pred_labels, average = 'macro')
accuracy = accuracy_score(y_val, y_pred_labels)
f1_score= f1_score(y_val, y_pred_labels,average = 'macro')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"f1_score: {f1_score:.4f}")


Precision: 0.5712
Recall: 0.6008
Accuracy: 0.6943
f1_score: 0.5827


In [17]:
from sklearn.metrics import classification_report,confusion_matrix

print(classification_report(y_val, y_pred_labels))
print(confusion_matrix(y_val, y_pred_labels))

              precision    recall  f1-score   support

           0       0.67      0.68      0.67       813
           1       0.61      0.65      0.63       545
           2       0.61      0.56      0.58      3191
           3       0.90      0.86      0.88      4896
           4       0.30      0.36      0.33       169
           5       0.30      0.46      0.37       621
           6       0.61      0.63      0.62      2448

    accuracy                           0.69     12683
   macro avg       0.57      0.60      0.58     12683
weighted avg       0.71      0.69      0.70     12683

[[ 553   48   47   57    7   91   10]
 [  23  354   65   18   28   54    3]
 [  74   78 1772  163   69  286  749]
 [  75   26  167 4234   13  167  214]
 [   8   26   37   11   61   19    7]
 [  86   39  103   89    8  288    8]
 [  11   11  699  117   20   46 1544]]


In [15]:
sample_text = " www./i want to live"

print("Predicted Sentiment:", predict_text(sample_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Predicted Sentiment: Normal
