In [None]:
import pandas as pd
import re
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
# Load your data into a DataFrame
encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252']

for encoding in encodings:
    try:
        df = pd.read_csv('Path Of Your Data Set', encoding=encoding)
        # If reading succeeds, break out of the loop
        break
    except UnicodeDecodeError:
        print(f"Failed to read with encoding {encoding}")

# Now df contains your DataFrame with the successfully decoded data

# Display a few values from the original data
print("Original Data Sample:")
print(df.head())


In [None]:
# Specify the column containing text data
message_column = 'Column Name Contain Text Data'

# Check and convert non-string elements to strings
df[message_column] = df[message_column].astype(str)

# Lowercasing and removing special characters
df[message_column] = df[message_column].str.lower()
df[message_column] = df[message_column].apply(lambda x: re.sub(r'[^a-zA-Z0-9\s]', '', x))
# Display a few values after preprocessing
print("\nData After Preprocessing:")
print(df.head())


In [None]:
# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Encode the target variable y
y_encoded = label_encoder.fit_transform(df['Column Name Contain Sentiment'])
# Display a few values of y (encoded)
print("\nEncoded Target Variable (y):")
print(y_encoded[:5])


In [None]:
# Tokenize the text data
tokenizer = Tokenizer(num_words=10000)  # Adjust num_words as needed
tokenizer.fit_on_texts(df[message_column])
# Convert text to sequences
X_sequences = tokenizer.texts_to_sequences(df[message_column])
# Display a few Sequences
print("\nDispla few values of sequences:")
print(X_sequences[:5])
# Pad sequences to a fixed length (adjust maxlen as needed)
X_padded = pad_sequences(X_sequences, maxlen=100)  # maxlen is the maximum sequence length
# Display a few Pad Sequences
print("\nDisplay few values of pad sequences:")
print(X_padded[:5])


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_encoded, test_size=0.2, random_state=42)
# Display a few values of X_train
print("\nFew Values Of X_train :")
print(X_train[:5])
# Display a few values of X_test
print("\nFew Values Of X_test :")
print(X_test[:5])
# Display a few values of y_train
print("\nFew Values Of y_train :")
print(y_train[:5])
# Display a few values of y_test
print("\nFew Values Of y_test:")
print(y_test[:5])


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Build a more complex RNN model with LSTM layers
model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=128, input_length=100))
model.add(LSTM(units=128, return_sequences=True))  # Return sequences for deeper layers
model.add(LSTM(units=64))  # You can add more LSTM layers for better performance
model.add(Dense(units=3, activation='softmax'))

# Compile the model with a lower learning rate and dropout for regularization
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Implement early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with more epochs and a larger batch size
model.fit(X_train, y_train, epochs=20, batch_size=128, validation_split=0.2, callbacks=[early_stopping])


In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')


In [None]:
from sklearn.metrics import classification_report

# Make predictions on the test set
y_pred = model.predict(X_test)

# Convert predicted probabilities to class labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Generate a classification report
report = classification_report(y_test, y_pred_labels, target_names=label_encoder.classes_)
print("Classification Report:")
print(report)
