Video Link:https://drive.google.com/file/d/1F7LVckNMizXEhHqd68peuZrZDPAr3XFl/view?usp=sharing

In [91]:
import pandas as pd 
import numpy as np
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import re

from sklearn.preprocessing import LabelEncoder
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [92]:
data = pd.read_csv('Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

In [93]:
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

In [94]:
for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

In [95]:
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

In [96]:
X = pad_sequences(X)

embed_dim = 129
lstm_out = 196

In [97]:
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

Modified the model to take embed_dim & lstm_out as parameters to be able to do gridsearch

In [98]:
# Define the function to create the model
def create_model(embed_dim, lstm_out):
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [99]:
# Create the KerasClassifier object for use with GridSearchCV
model = KerasClassifier(build_fn=create_model)

  model = KerasClassifier(build_fn=create_model)


In [100]:
# Define the hyperparameters to search over
param_grid = {
    'embed_dim': [64, 128],
    'lstm_out': [128, 196]
}

In [101]:
# Create the GridSearchCV object
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2, verbose=2,scoring='neg_log_loss')

# Fit the GridSearchCV object to the training data
grid_result = grid.fit(X_train, Y_train)

Fitting 2 folds for each of 4 candidates, totalling 8 fits
[CV] END .........................embed_dim=64, lstm_out=128; total time=  25.9s
[CV] END .........................embed_dim=64, lstm_out=128; total time=  17.0s
[CV] END .........................embed_dim=64, lstm_out=196; total time=  47.0s
[CV] END .........................embed_dim=64, lstm_out=196; total time=  48.1s
[CV] END ........................embed_dim=128, lstm_out=128; total time=  20.0s
[CV] END ........................embed_dim=128, lstm_out=128; total time=  19.4s
[CV] END ........................embed_dim=128, lstm_out=196; total time=  49.7s
[CV] END ........................embed_dim=128, lstm_out=196; total time=  34.9s


In [102]:
# Print the best hyperparameters and score
print("Best parameters: ", grid_result.best_params_)
print("Best score: ", grid_result.best_score_)


Best parameters:  {'embed_dim': 128, 'lstm_out': 128}
Best score:  -0.7970625649995492


Modifying the model with best parameters obtained from gridsearch

In [105]:
# Build and train the model with the best hyperparameters
best_params = grid_result.best_params_
model = create_model(best_params['embed_dim'], best_params['lstm_out'])
model.fit(X_train, Y_train, epochs = 1, batch_size=32, verbose = 2)

291/291 - 29s - loss: 0.8246 - accuracy: 0.6448 - 29s/epoch - 98ms/step


<keras.callbacks.History at 0x7f9d9d5f7610>

In [106]:
# Evaluate the model on the test set
score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=32)
print("Test loss: ", score)
print("Test accuracy: ", acc)

144/144 - 2s - loss: 0.7687 - accuracy: 0.6767 - 2s/epoch - 14ms/step
Test loss:  0.7686622142791748
Test accuracy:  0.6767147183418274


In [1]:
test_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump"

In [107]:
# Enter your new text here

# Preprocess the new text
test_text = test_text.lower()
test_text = re.sub('[^a-zA-z0-9\s]', '', test_text)
test_text_seq = tokenizer.texts_to_sequences([test_text])
test_text_padded = pad_sequences(test_text_seq, maxlen=X.shape[1], padding='post')

# Predict the sentiment of the new text
predicted_sentiment = model.predict(test_text_padded)[0]
predicted_label = labelencoder.inverse_transform([np.argmax(predicted_sentiment)])
print("Predicted sentiment: ", predicted_label[0])


Predicted sentiment:  Neutral
