In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/final_combined_mobandus.csv")
df = df.dropna()

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from keras.optimizers import Adam , SGD

In [None]:
# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Text'])
X = tokenizer.texts_to_sequences(df['Text'])
X = pad_sequences(X)

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Label'])

# Convert labels to one-hot encoding
num_classes = len(label_encoder.classes_)
y = tf.keras.utils.to_categorical(y, num_classes=num_classes)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
learning_rate = 0.0001

# Initialize the model
model = Sequential()

# Add an Embedding layer
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X.shape[1]))

# Add LSTM layers
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.3))  # Adding dropout for regularization

# Add a Dense layer for classification
model.add(Dense(3, activation="softmax"))

# Output model summary
model.summary()
optimizer = Adam(learning_rate=learning_rate)

# Compile the model
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)


Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_15 (Embedding)    (None, 72, 50)            1136050   
                                                                 
 lstm_25 (LSTM)              (None, 72, 50)            20200     
                                                                 
 lstm_26 (LSTM)              (None, 100)               60400     
                                                                 
 dropout_10 (Dropout)        (None, 100)               0         
                                                                 
 dense_15 (Dense)            (None, 3)                 303       
                                                                 
Total params: 1216953 (4.64 MB)
Trainable params: 1216953 (4.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoc

<keras.src.callbacks.History at 0x7a3eca18fe50>

In [None]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
# Train the model

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Print classification report
print("Classification Report:\n", classification_report(y_true, y_pred_classes))

# Print accuracy
accuracy = accuracy_score(y_true, y_pred_classes)
print("Accuracy:", accuracy)


Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.86      0.79       662
           1       0.76      0.59      0.67       212
           2       0.91      0.83      0.87       899

    accuracy                           0.82      1773
   macro avg       0.80      0.76      0.78      1773
weighted avg       0.82      0.82      0.82      1773

Accuracy: 0.8161308516638466


In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, Dropout
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from imblearn.over_sampling import SMOTE

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Text'])
X = tokenizer.texts_to_sequences(df['Text'])
X = pad_sequences(X)

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Label'])

# Convert labels to one-hot encoding
num_classes = len(label_encoder.classes_)
y = tf.keras.utils.to_categorical(y, num_classes=num_classes)

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the resampled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Specify the learning rate
learning_rate = 0.0001

# Initialize the model
model = Sequential()

# Add an Embedding layer
embedding_dim = 50
vocab_size = len(tokenizer.word_index) + 1
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X.shape[1]))

# Add LSTM layers
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.3))  # Adding dropout for regularization

# Add a Dense layer for classification
model.add(Dense(3, activation="softmax"))

# Output model summary
model.summary()

# Compile the model with a specified learning rate
optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)


Model: "sequential_21"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_21 (Embedding)    (None, 72, 50)            1136050   
                                                                 
 lstm_37 (LSTM)              (None, 72, 50)            20200     
                                                                 
 lstm_38 (LSTM)              (None, 100)               60400     
                                                                 
 dropout_16 (Dropout)        (None, 100)               0         
                                                                 
 dense_21 (Dense)            (None, 3)                 303       
                                                                 
Total params: 1216953 (4.64 MB)
Trainable params: 1216953 (4.64 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoc

<keras.src.callbacks.History at 0x7a3ebc7acd30>

In [None]:
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
# Train the model

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Print classification report
print("Classification Report:\n", classification_report(y_true, y_pred_classes))

# Print accuracy
accuracy = accuracy_score(y_true, y_pred_classes)
print("Accuracy:", accuracy)


Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.62      0.62       905
           1       0.66      0.62      0.64       891
           2       0.71      0.75      0.73       944

    accuracy                           0.67      2740
   macro avg       0.66      0.66      0.66      2740
weighted avg       0.66      0.67      0.66      2740

Accuracy: 0.6656934306569343
