<a href="https://colab.research.google.com/github/SujithKumarAnumolu/NeuralNetworkDeepLearning/blob/main/Assignment6/ICP6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install keras-tuner


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/129.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
pip install pandas scikit-learn tensorflow keras-tuner




In [None]:
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re
from sklearn.preprocessing import LabelEncoder
import keras_tuner as kt
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk
from nltk.stem import WordNetLemmatizer

# Download required NLTK resources
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

# Load and preprocess data
data = pd.read_csv('Sentiment.csv')
data = data[['text', 'sentiment']]
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))

# Initialize stopwords and lemmatizer
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Remove stopwords and lemmatize
data['text'] = data['text'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word) for word in word_tokenize(x) if word not in stop_words]))

# Replace 'rt' in the text
data['text'] = data['text'].apply(lambda x: x.replace('rt', ' '))

# Check class distribution
print(data['sentiment'].value_counts())

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)
X = pad_sequences(X)

embed_dim = 128
lstm_out = 196

# Define the model
def createmodel(hp):
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim, input_length=X.shape[1]))
    model.add(SpatialDropout1D(0.2))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))

    hp_optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop'])
    model.compile(loss='categorical_crossentropy', optimizer=hp_optimizer, metrics=['accuracy'])
    return model

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Train and save the model
batch_size = 32
model = createmodel(kt.HyperParameters())
model.fit(X_train, Y_train, epochs=10, batch_size=batch_size, verbose=2, validation_split=0.1)
model.save('sentiment_lstm_model.h5')

# Evaluate the model
loss, accuracy = model.evaluate(X_test, Y_test, verbose=2)
print(f"Test Accuracy: {accuracy}")

# Load the model and predict new text data
model = load_model('sentiment_lstm_model.h5')
new_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing. @realDonaldTrump"
new_text = new_text.lower()
new_text = re.sub('[^a-zA-Z0-9\s]', '', new_text)
new_text = ' '.join([lemmatizer.lemmatize(word) for word in word_tokenize(new_text) if word not in stop_words])
new_text = new_text.replace('rt', ' ')
new_sequence = tokenizer.texts_to_sequences([new_text])
new_sequence = pad_sequences(new_sequence, maxlen=X.shape[1])
predicted_sentiment = model.predict(new_sequence)
predicted_label_index = predicted_sentiment.argmax(axis=1)[0]
predicted_label = labelencoder.inverse_transform([predicted_label_index])
print(f"Predicted sentiment: {predicted_label[0]}")

# Apply KerasTuner for hyperparameter tuning
tuner = kt.RandomSearch(
    createmodel,
    objective='val_accuracy',
    max_trials=10,  # Increased trials for better tuning
    executions_per_trial=3,
    directory='my_dir',
    project_name='sentiment_analysis'
)

tuner.search(X_train, Y_train, epochs=5, validation_split=0.1)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best hyperparameters: {best_hps.values}")

# Train the model with the best hyperparameters
best_model = tuner.hypermodel.build(best_hps)
best_model.fit(X_train, Y_train, epochs=10, validation_split=0.1)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


sentiment
Negative    8493
Neutral     3142
Positive    2236
Name: count, dtype: int64




Epoch 1/10
262/262 - 39s - loss: 0.8316 - accuracy: 0.6389 - val_loss: 0.7663 - val_accuracy: 0.6871 - 39s/epoch - 148ms/step
Epoch 2/10
262/262 - 28s - loss: 0.6733 - accuracy: 0.7074 - val_loss: 0.7600 - val_accuracy: 0.6785 - 28s/epoch - 106ms/step
Epoch 3/10
262/262 - 23s - loss: 0.6075 - accuracy: 0.7427 - val_loss: 0.8007 - val_accuracy: 0.6505 - 23s/epoch - 90ms/step
Epoch 4/10
262/262 - 25s - loss: 0.5682 - accuracy: 0.7588 - val_loss: 0.8447 - val_accuracy: 0.6559 - 25s/epoch - 97ms/step
Epoch 5/10
262/262 - 26s - loss: 0.5305 - accuracy: 0.7765 - val_loss: 0.8366 - val_accuracy: 0.6581 - 26s/epoch - 98ms/step
Epoch 6/10
262/262 - 27s - loss: 0.4879 - accuracy: 0.7972 - val_loss: 0.9502 - val_accuracy: 0.6441 - 27s/epoch - 104ms/step
Epoch 7/10
262/262 - 25s - loss: 0.4543 - accuracy: 0.8105 - val_loss: 0.9606 - val_accuracy: 0.6269 - 25s/epoch - 96ms/step
Epoch 8/10
262/262 - 25s - loss: 0.4240 - accuracy: 0.8204 - val_loss: 1.0820 - val_accuracy: 0.6409 - 25s/epoch - 97ms/st

  saving_api.save_model(


144/144 - 2s - loss: 1.1996 - accuracy: 0.6429 - 2s/epoch - 13ms/step




Test Accuracy: 0.6428571343421936








Predicted sentiment: Positive
Reloading Tuner from my_dir/sentiment_analysis/tuner0.json
Best hyperparameters: {'optimizer': 'adam'}
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7fd61d4e6e90>