<a href="https://colab.research.google.com/github/45luckyy/Sentimental-Insights-Analyzing/blob/main/sentiment_model_h5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LSTM Model

# Import necessary libraries

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout


# Data Preprocessing

Load preprocessed data

In [None]:

data = pd.read_csv("Preprocessed_Reddit_Data2.csv")

In [None]:
data

Unnamed: 0,clean_comment,category
0,family mormon never tried explain still star...,1
1,buddhism much lot compatible christianity espe...,1
2,seriously say thing first get complex explain ...,-1
3,learned want teach different focus goal wrappi...,0
4,benefit may want read living buddha living chr...,1
...,...,...
9995,team standard scoring pick tevin coleman ari...,0
9996,standard scoring pick one booker marshall smal...,0
9997,team ppr pick two cooks ware miller,0
9998,ppr pick jonathan stewart sterling shepard w...,0


Separate features and labels

In [None]:
X = data['clean_comment']
y = data['category']

Encode labels (-1, 0, 1)

In [None]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)  # Converts -1, 0, 1 to 0, 1, 2
y = tf.keras.utils.to_categorical(y, num_classes=3)  # One-hot encoding


Tokenize and pad sequences

In [None]:
tokenizer = Tokenizer()
# Convert the 'clean_comment' column to strings and handle missing values
X = X.astype(str).fillna('')  # Convert to string and replace NaNs with empty strings
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

Pad sequences to ensure uniform input size


In [None]:
max_len = 100  # Adjust based on average comment length
X = pad_sequences(X, maxlen=max_len, padding='post')


# Train-test split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model

In [None]:
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_len),
    LSTM(64, return_sequences=True, dropout=0.2),
    LSTM(32, dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 classes: Negative, Neutral, Positive
])



Compile the model

In [None]:

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model

In [None]:

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=5,  # Adjust epochs as needed
    batch_size=64,
    verbose=1
)

Epoch 1/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 190ms/step - accuracy: 0.4276 - loss: 1.0756 - val_accuracy: 0.4390 - val_loss: 1.0668
Epoch 2/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 191ms/step - accuracy: 0.4427 - loss: 1.0606 - val_accuracy: 0.5655 - val_loss: 0.9776
Epoch 3/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 193ms/step - accuracy: 0.5328 - loss: 1.0134 - val_accuracy: 0.5420 - val_loss: 0.9989
Epoch 4/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 188ms/step - accuracy: 0.5010 - loss: 1.0031 - val_accuracy: 0.4700 - val_loss: 1.0016
Epoch 5/5
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 174ms/step - accuracy: 0.4820 - loss: 0.9873 - val_accuracy: 0.4390 - val_loss: 0.9820


# Save the model

In [None]:

model.save("sentiment_model.keras")
print("Model saved as sentiment_model_h5")

Model saved as sentiment_model_h5


# Evaluate the model

In [None]:

loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {accuracy:.2f}")

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.4585 - loss: 0.9678
Validation Accuracy: 0.44
