In [None]:

import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, LSTM
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import numpy as np


data = pd.read_csv('Data.csv')
data = data[['text', 'sentiment']]
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-z0-9\\s]', '', x))
data['text'] = data['text'].apply(lambda x: x.replace('rt', ' '))


max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)


labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)


def createmodel(embed_dim=128, lstm_out=196):
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

model = createmodel()
model.fit(X, y, epochs=3, batch_size=32, verbose=1)


model.save('saved_lstm_model.h5')


new_text = ["A lot of good things are happening. We are respected again throughout the world, and that's a great thing .@realDonaldTrump"]
new_text_proc = [re.sub('[^a-zA-z0-9\\s]', '', s.lower()).replace('rt', ' ') for s in new_text]
seq = tokenizer.texts_to_sequences(new_text_proc)
padded = pad_sequences(seq, maxlen=X.shape[1])

loaded_model = load_model('saved_lstm_model.h5')
prediction = loaded_model.predict(padded)
predicted_class = np.argmax(prediction)
predicted_sentiment = labelencoder.inverse_transform([predicted_class])

print("Predicted Sentiment:", predicted_sentiment[0])

from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier

# Define build_model function with parameters for GridSearchCV
def build_model(embed_dim=128, lstm_out=196):
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Correct usage with build_fn
keras_model = KerasClassifier(
    build_fn=build_model,
    verbose=0
)

# Hyperparameter grid
param_grid = {
    'embed_dim': [64, 128],
    'lstm_out': [128, 196],
    'batch_size': [16, 32],
    'epochs': [3]  # Keep small for testing
}

grid = GridSearchCV(estimator=keras_model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X, y)

print("Best Score: {:.2f}%".format(grid_result.best_score_ * 100))
print("Best Parameters:", grid_result.best_params_)




Epoch 1/3
[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 155ms/step - accuracy: 0.6267 - loss: 0.8645
Epoch 2/3
[1m434/434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 162ms/step - accuracy: 0.7189 - loss: 0.6644
Epoch 3/3
[1m394/434[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m6s[0m 162ms/step - accuracy: 0.7398 - loss: 0.6254