In [8]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [7]:
path_to_csv = '/content/gdrive/My Drive/Sentiment.csv'

In [9]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
#from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re
import numpy as np

from keras.models import load_model

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv(path_to_csv)
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)

def predict_sentiment(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z0-9\s]', '', text)
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=X.shape[1])
    prediction = model.predict(padded_sequence)
    sentiment_labels = ['Negative', 'Neutral', 'Positive']
    sentiment = sentiment_labels[np.argmax(prediction)]
    return sentiment

# Assuming the model is trained and the function is defined, now you can use it for prediction
new_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing. @realDonaldTrump"
predicted_sentiment = predict_sentiment(new_text)
print("Predicted sentiment:", predicted_sentiment)




291/291 - 55s - loss: 0.8145 - accuracy: 0.6524 - 55s/epoch - 188ms/step
144/144 - 2s - loss: 0.7818 - accuracy: 0.6640 - 2s/epoch - 11ms/step
0.7817742228507996
0.6640454530715942
['loss', 'accuracy']
Predicted sentiment: Negative


In [14]:
import pandas as pd
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re
import numpy as np

from keras.models import load_model
from sklearn.preprocessing import LabelEncoder
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

data = pd.read_csv(path_to_csv)
# Keeping only the necessary columns
data = data[['text', 'sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196

def createmodel(optimizer='adam'):
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42)

batch_size = 32

# Create KerasClassifier for GridSearchCV
model = KerasClassifier(build_fn=createmodel, verbose=0)

# Define hyperparameters to search
param_grid = {
    'batch_size': [32, 64],
    'epochs': [1, 2],
    'optimizer': ['adam', 'rmsprop']
}

# Create GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, verbose=2)
grid_result = grid.fit(X_train, Y_train)

# Print best parameters and results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


  model = KerasClassifier(build_fn=createmodel, verbose=0)


Fitting 3 folds for each of 8 candidates, totalling 24 fits




[CV] END ............batch_size=32, epochs=1, optimizer=adam; total time=  41.9s




[CV] END ............batch_size=32, epochs=1, optimizer=adam; total time=  45.5s




[CV] END ............batch_size=32, epochs=1, optimizer=adam; total time=  46.6s




[CV] END .........batch_size=32, epochs=1, optimizer=rmsprop; total time=  45.3s




[CV] END .........batch_size=32, epochs=1, optimizer=rmsprop; total time=  45.1s




[CV] END .........batch_size=32, epochs=1, optimizer=rmsprop; total time=  46.9s




[CV] END ............batch_size=32, epochs=2, optimizer=adam; total time= 1.1min




[CV] END ............batch_size=32, epochs=2, optimizer=adam; total time= 1.4min




[CV] END ............batch_size=32, epochs=2, optimizer=adam; total time= 1.1min




[CV] END .........batch_size=32, epochs=2, optimizer=rmsprop; total time= 1.4min




[CV] END .........batch_size=32, epochs=2, optimizer=rmsprop; total time= 1.1min




[CV] END .........batch_size=32, epochs=2, optimizer=rmsprop; total time= 1.1min




[CV] END ............batch_size=64, epochs=1, optimizer=adam; total time=  44.8s




[CV] END ............batch_size=64, epochs=1, optimizer=adam; total time=  27.9s




[CV] END ............batch_size=64, epochs=1, optimizer=adam; total time=  26.9s




[CV] END .........batch_size=64, epochs=1, optimizer=rmsprop; total time=  44.7s




[CV] END .........batch_size=64, epochs=1, optimizer=rmsprop; total time=  24.6s




[CV] END .........batch_size=64, epochs=1, optimizer=rmsprop; total time=  24.1s




[CV] END ............batch_size=64, epochs=2, optimizer=adam; total time=  45.4s




[CV] END ............batch_size=64, epochs=2, optimizer=adam; total time=  41.1s




[CV] END ............batch_size=64, epochs=2, optimizer=adam; total time=  45.5s




[CV] END .........batch_size=64, epochs=2, optimizer=rmsprop; total time=  45.2s




[CV] END .........batch_size=64, epochs=2, optimizer=rmsprop; total time=  44.6s




[CV] END .........batch_size=64, epochs=2, optimizer=rmsprop; total time=  45.5s
Best: 0.675025 using {'batch_size': 64, 'epochs': 2, 'optimizer': 'adam'}
0.662649 (0.005100) with: {'batch_size': 32, 'epochs': 1, 'optimizer': 'adam'}
0.653826 (0.002873) with: {'batch_size': 32, 'epochs': 1, 'optimizer': 'rmsprop'}
0.664154 (0.015085) with: {'batch_size': 32, 'epochs': 2, 'optimizer': 'adam'}
0.665124 (0.001244) with: {'batch_size': 32, 'epochs': 2, 'optimizer': 'rmsprop'}
0.658237 (0.005930) with: {'batch_size': 64, 'epochs': 1, 'optimizer': 'adam'}
0.630583 (0.009395) with: {'batch_size': 64, 'epochs': 1, 'optimizer': 'rmsprop'}
0.675025 (0.004854) with: {'batch_size': 64, 'epochs': 2, 'optimizer': 'adam'}
0.665447 (0.001041) with: {'batch_size': 64, 'epochs': 2, 'optimizer': 'rmsprop'}
