In [1]:
import pandas as pd #Basic packages for creating dataframes and loading dataset
import numpy as np

import matplotlib.pyplot as plt #Package for visualization

import re #importing package for Regular expression operations

from sklearn.model_selection import train_test_split #Package for splitting the data

from sklearn.preprocessing import LabelEncoder #Package for conversion of categorical to Numerical

from keras.preprocessing.text import Tokenizer #Tokenization
from tensorflow.keras.preprocessing.sequence import pad_sequences #Add zeros or crop based on the length
from keras.models import Sequential #Sequential Neural Network
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D #For layers in Neural Network
from keras.utils.np_utils import to_categorical

In [7]:
import pandas as pd

# Load the dataset as a Pandas DataFrame
data = pd.read_csv('/content/Sentiment (1).csv')

# Select only the necessary columns 'text' and 'sentiment'
mask = data.columns.isin(['text', 'sentiment'])
data = data.loc[:, mask]


In [8]:
# Keeping only the necessary columns
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\\s]', '', x)))

In [9]:
for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ') #Removing Retweets

In [10]:
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ') #Maximum words is 2000 to tokenize sentence
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values) #taking values to feature matrix

In [11]:
X = pad_sequences(X) #Padding the feature matrix

embed_dim = 128 #Dimension of the Embedded layer
lstm_out = 196 #Long short-term memory (LSTM) layer neurons

In [12]:
def createmodel():
    model = Sequential() #Sequential Neural Network
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1])) #input dimension 2000 Neurons, output dimension 128 Neurons
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) #Drop out 20%, 196 output Neurons, recurrent dropout 20%
    model.add(Dense(3,activation='softmax')) #3 output neurons[positive, Neutral, Negative], softmax as activation
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy']) #Compiling the model
    return model
# print(model.summary())

In [14]:
labelencoder = LabelEncoder() #Applying label Encoding on the label matrix
integer_encoded = labelencoder.fit_transform(data['sentiment']) #fitting the model
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42) #67% training data, 33% test data split

In [15]:
batch_size = 32 #Batch size 32
model = createmodel() #Function call to Sequential Neural Network
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2) #verbose the higher, the more messages
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size) #evaluating the model
print(score)
print(acc)



291/291 - 72s - loss: 0.8287 - accuracy: 0.6416 - 72s/epoch - 248ms/step
144/144 - 2s - loss: 0.7513 - accuracy: 0.6763 - 2s/epoch - 13ms/step
0.7512679100036621
0.6762778759002686


In [16]:
print(model.metrics_names) #metrics of the model

['loss', 'accuracy']


In [18]:
"1. Save the model and use the saved model to predict on new text data (ex, “A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump”)"

"1. Save the model and use the saved model to predict on new text data (ex, “A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump”)"

In [19]:
model.save('sentimentAnalysis.h5') #Saving the model

In [20]:
from keras.models import load_model #Importing the package for importing the saved model
model= load_model('sentimentAnalysis.h5') #loading the saved model



In [21]:
print(integer_encoded)
print(data['sentiment'])

[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object


In [23]:
# Predicting on the text data
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that is a great thing.@realDonaldTrump']
sentence = tokenizer.texts_to_sequences(sentence) # Tokenizing the sentence
sentence = pad_sequences(sentence, maxlen=28, dtype='int32', value=0) # Padding the sentence
sentiment_probs = model.predict(sentence, batch_size=1, verbose=2)[0] # Predicting the sentence text
sentiment = np.argmax(sentiment_probs)

print(sentiment_probs)
if sentiment == 0:
    print("Neutral")
elif sentiment < 0:
    print("Negative")
elif sentiment > 0:
    print("Positive")
else:
    print("Cannot be determined")

1/1 - 1s - 509ms/epoch - 509ms/step
[0.6314133  0.16051993 0.20806675]
Neutral


In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

model = KerasClassifier(build_fn=createmodel, verbose=2)
batch_size = [10, 20, 40]
epochs = [1, 2]
param_grid = {'batch_size': batch_size, 'epochs': epochs}
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


  model = KerasClassifier(build_fn=createmodel, verbose=2)


744/744 - 129s - loss: 0.8216 - accuracy: 0.6508 - 129s/epoch - 174ms/step
186/186 - 2s - loss: 0.7412 - accuracy: 0.6810 - 2s/epoch - 13ms/step




744/744 - 119s - loss: 0.8147 - accuracy: 0.6473 - 119s/epoch - 160ms/step
186/186 - 3s - loss: 0.7553 - accuracy: 0.6821 - 3s/epoch - 14ms/step




744/744 - 121s - loss: 0.8266 - accuracy: 0.6427 - 121s/epoch - 162ms/step
186/186 - 2s - loss: 0.7772 - accuracy: 0.6751 - 2s/epoch - 13ms/step




744/744 - 122s - loss: 0.8253 - accuracy: 0.6413 - 122s/epoch - 163ms/step
186/186 - 2s - loss: 0.7496 - accuracy: 0.6771 - 2s/epoch - 13ms/step




744/744 - 122s - loss: 0.8236 - accuracy: 0.6456 - 122s/epoch - 164ms/step
186/186 - 3s - loss: 0.7726 - accuracy: 0.6658 - 3s/epoch - 14ms/step




Epoch 1/2
744/744 - 123s - loss: 0.8234 - accuracy: 0.6442 - 123s/epoch - 165ms/step
Epoch 2/2
744/744 - 108s - loss: 0.6786 - accuracy: 0.7084 - 108s/epoch - 145ms/step
186/186 - 2s - loss: 0.7425 - accuracy: 0.6789 - 2s/epoch - 13ms/step




Epoch 1/2
744/744 - 117s - loss: 0.8254 - accuracy: 0.6421 - 117s/epoch - 157ms/step
Epoch 2/2
744/744 - 107s - loss: 0.6814 - accuracy: 0.7127 - 107s/epoch - 144ms/step
186/186 - 2s - loss: 0.7999 - accuracy: 0.6740 - 2s/epoch - 13ms/step




Epoch 1/2
744/744 - 119s - loss: 0.8226 - accuracy: 0.6443 - 119s/epoch - 160ms/step
Epoch 2/2
744/744 - 105s - loss: 0.6752 - accuracy: 0.7144 - 105s/epoch - 142ms/step
186/186 - 2s - loss: 0.7457 - accuracy: 0.6880 - 2s/epoch - 13ms/step




Epoch 1/2
744/744 - 120s - loss: 0.8304 - accuracy: 0.6417 - 120s/epoch - 162ms/step
Epoch 2/2
744/744 - 110s - loss: 0.6781 - accuracy: 0.7134 - 110s/epoch - 148ms/step
186/186 - 3s - loss: 0.7707 - accuracy: 0.6609 - 3s/epoch - 16ms/step




Epoch 1/2
744/744 - 124s - loss: 0.8135 - accuracy: 0.6482 - 124s/epoch - 166ms/step
Epoch 2/2
744/744 - 110s - loss: 0.6584 - accuracy: 0.7223 - 110s/epoch - 148ms/step
186/186 - 3s - loss: 0.7807 - accuracy: 0.6690 - 3s/epoch - 18ms/step




372/372 - 71s - loss: 0.8403 - accuracy: 0.6415 - 71s/epoch - 191ms/step
93/93 - 1s - loss: 0.7562 - accuracy: 0.6729 - 1s/epoch - 14ms/step




372/372 - 67s - loss: 0.8281 - accuracy: 0.6453 - 67s/epoch - 180ms/step
93/93 - 1s - loss: 0.7667 - accuracy: 0.6643 - 1s/epoch - 13ms/step




372/372 - 69s - loss: 0.8238 - accuracy: 0.6411 - 69s/epoch - 186ms/step
93/93 - 2s - loss: 0.7824 - accuracy: 0.6735 - 2s/epoch - 20ms/step




372/372 - 68s - loss: 0.8404 - accuracy: 0.6403 - 68s/epoch - 182ms/step
93/93 - 1s - loss: 0.7661 - accuracy: 0.6712 - 1s/epoch - 15ms/step




372/372 - 66s - loss: 0.8306 - accuracy: 0.6463 - 66s/epoch - 178ms/step
93/93 - 1s - loss: 0.7663 - accuracy: 0.6728 - 1s/epoch - 14ms/step




Epoch 1/2
372/372 - 67s - loss: 0.8383 - accuracy: 0.6361 - 67s/epoch - 181ms/step
Epoch 2/2
372/372 - 54s - loss: 0.6883 - accuracy: 0.7058 - 54s/epoch - 146ms/step
93/93 - 2s - loss: 0.7422 - accuracy: 0.6649 - 2s/epoch - 19ms/step




Epoch 1/2
372/372 - 67s - loss: 0.8250 - accuracy: 0.6426 - 67s/epoch - 181ms/step
Epoch 2/2
372/372 - 56s - loss: 0.6857 - accuracy: 0.7111 - 56s/epoch - 151ms/step
93/93 - 2s - loss: 0.7349 - accuracy: 0.6837 - 2s/epoch - 20ms/step




Epoch 1/2
372/372 - 67s - loss: 0.8296 - accuracy: 0.6383 - 67s/epoch - 181ms/step
Epoch 2/2
372/372 - 54s - loss: 0.6800 - accuracy: 0.7137 - 54s/epoch - 145ms/step
93/93 - 2s - loss: 0.7485 - accuracy: 0.6875 - 2s/epoch - 17ms/step




Epoch 1/2
372/372 - 69s - loss: 0.8346 - accuracy: 0.6417 - 69s/epoch - 186ms/step
Epoch 2/2
372/372 - 57s - loss: 0.6845 - accuracy: 0.7098 - 57s/epoch - 154ms/step
93/93 - 1s - loss: 0.7470 - accuracy: 0.6911 - 1s/epoch - 14ms/step




Epoch 1/2
372/372 - 69s - loss: 0.8237 - accuracy: 0.6420 - 69s/epoch - 185ms/step
Epoch 2/2
372/372 - 55s - loss: 0.6711 - accuracy: 0.7132 - 55s/epoch - 147ms/step
93/93 - 1s - loss: 0.7914 - accuracy: 0.6674 - 1s/epoch - 14ms/step




186/186 - 43s - loss: 0.8439 - accuracy: 0.6316 - 43s/epoch - 233ms/step
47/47 - 1s - loss: 0.7480 - accuracy: 0.6654 - 825ms/epoch - 18ms/step




186/186 - 42s - loss: 0.8396 - accuracy: 0.6425 - 42s/epoch - 225ms/step
47/47 - 1s - loss: 0.8189 - accuracy: 0.6407 - 863ms/epoch - 18ms/step




186/186 - 41s - loss: 0.8606 - accuracy: 0.6322 - 41s/epoch - 220ms/step
47/47 - 1s - loss: 0.7808 - accuracy: 0.6584 - 817ms/epoch - 17ms/step




186/186 - 41s - loss: 0.8466 - accuracy: 0.6366 - 41s/epoch - 222ms/step
47/47 - 1s - loss: 0.7594 - accuracy: 0.6803 - 848ms/epoch - 18ms/step




186/186 - 42s - loss: 0.8510 - accuracy: 0.6371 - 42s/epoch - 226ms/step
47/47 - 1s - loss: 0.8133 - accuracy: 0.6480 - 860ms/epoch - 18ms/step




Epoch 1/2
186/186 - 42s - loss: 0.8406 - accuracy: 0.6353 - 42s/epoch - 225ms/step
Epoch 2/2
186/186 - 31s - loss: 0.6886 - accuracy: 0.7068 - 31s/epoch - 166ms/step
47/47 - 1s - loss: 0.7311 - accuracy: 0.6934 - 856ms/epoch - 18ms/step




Epoch 1/2
186/186 - 42s - loss: 0.8438 - accuracy: 0.6363 - 42s/epoch - 223ms/step
Epoch 2/2
