#Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import classification_report, confusion_matrix

### LOADING AND PROCESSING DATA


In [2]:
data = pd.read_csv('/content/sentiment-analysis.csv',sep=', ')
data.columns = data.columns.str.replace('"', '')
data = data.dropna()  # Remove rows with missing values
data['Text'] = data['Text'].astype(str)

  data = pd.read_csv('/content/sentiment-analysis.csv',sep=', ')


## ENCODE SENTIMENTAL VALUES


In [3]:
le = LabelEncoder()
data['Sentiment'] = le.fit_transform(data['Sentiment'])

## SPLITTING DATASET INTO TRAINING AND TESTING


In [5]:
X = data['Text']
y = data['Sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## TOKENIZE AND PREPARE SEQUENCES


In [7]:
max_words = 10000  # Maximum number of words in your vocabulary
max_len = 512  # Maximum length of input sequences

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

## BUILDING LSTM MODEL


In [8]:
embedding_dim = 100  # Dimension of word embeddings
lstm_units = 512  # Number of LSTM units

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(units=lstm_units, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train_pad, y_train, epochs=25, batch_size=64)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.src.callbacks.History at 0x7d4f537b56c0>

## PREDICTIING SENTIMENT VALUES


In [11]:
y_pred_probs = model.predict(X_test_pad)
y_pred = (y_pred_probs > 0.5).astype(int).flatten()

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))



Classification Report:
               precision    recall  f1-score   support

           0       0.45      1.00      0.62         5
           1       1.00      0.60      0.75        15

    accuracy                           0.70        20
   macro avg       0.73      0.80      0.69        20
weighted avg       0.86      0.70      0.72        20

Confusion Matrix:
 [[5 0]
 [6 9]]


# VISUALIZE CONFUSION MARTIX


In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues",
            xticklabels=le.classes_, yticklabels=le.classes_)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

## STORE PREDICTIONS


In [10]:
predictions = pd.DataFrame({'Text': X_test, 'Predicted_Sentiment': le.inverse_transform(y_pred)})
predictions.to_csv('predictions.csv', index=False)  # Replace with desired file path

print("Predictions:\n", predictions.head())

Predictions:
                                                  Text Predicted_Sentiment
80                     """The service was terrible.""            Negative
77  """This song always puts me in a nostalgic moo...            Positive
73  """The customer service at this store is outst...            Negative
94  """Their website is so confusing and poorly de...            Negative
33  """This restaurant has the most delicious food...            Positive
