<a href="https://colab.research.google.com/github/Sanjana-213/Neural-Network/blob/main/ICP6_700755482.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip uninstall -y tensorflow keras pandas matplotlib scikit-learn
!pip install tensorflow==2.15.0 keras==2.15.0 pandas==2.0.3 matplotlib==3.9.1 scikit-learn==1.5.1


Found existing installation: tensorflow 2.15.0
Uninstalling tensorflow-2.15.0:
  Successfully uninstalled tensorflow-2.15.0
Found existing installation: keras 2.15.0
Uninstalling keras-2.15.0:
  Successfully uninstalled keras-2.15.0
Found existing installation: pandas 2.0.3
Uninstalling pandas-2.0.3:
  Successfully uninstalled pandas-2.0.3
Found existing installation: matplotlib 3.7.1
Uninstalling matplotlib-3.7.1:
  Successfully uninstalled matplotlib-3.7.1
Found existing installation: scikit-learn 1.2.2
Uninstalling scikit-learn-1.2.2:
  Successfully uninstalled scikit-learn-1.2.2
Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras==2.15.0
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split, GridSearchCV # Import GridSearchCV here
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

# Load the dataset as a Pandas DataFrame
path_to_csv = '/content/sample_data/Sentiment (3) (2).csv'
dataset = pd.read_csv(path_to_csv, header=0)

# Select only the necessary columns 'text' and 'sentiment'
mask = dataset.columns.isin(['text', 'sentiment'])
data = dataset.loc[:, mask]

# Keeping only the necessary columns
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ') # Removing Retweets

max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ') # Maximum words is 2000 to tokenize sentence
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values) # Taking values to feature matrix
X = pad_sequences(X) # Padding the feature matrix

embed_dim = 128 # Dimension of the Embedded layer
lstm_out = 196 # Long short-term memory (LSTM) layer neurons

def createmodel():
    model = Sequential() # Sequential Neural Network
    model.add(Embedding(max_features, embed_dim, input_length = X.shape[1])) # input dimension 2000 Neurons, output dimension 128 Neurons
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) # Drop out 20%, 196 output Neurons, recurrent dropout 20%
    model.add(Dense(3, activation='softmax')) # 3 output neurons[positive, Neutral, Negative], softmax as activation
    model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy']) # Compiling the model
    return model

labelencoder = LabelEncoder() # Applying label Encoding on the label matrix
integer_encoded = labelencoder.fit_transform(data['sentiment']) # Fitting the model
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42) # 67% training data, 33% test data split

batch_size = 32 # Batch size 32
model = createmodel() # Function call to Sequential Neural Network
model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=2) # verbose the higher, the more messages
score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=batch_size) # evaluating the model
print(score)
print(acc)
print(model.metrics_names) # metrics of the model
print(integer_encoded)
print(data['sentiment'])

# Predicting on the text data
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that is a great thing.@realDonaldTrump']
sentence = tokenizer.texts_to_sequences(sentence) # Tokenizing the sentence
sentence = pad_sequences(sentence, maxlen=X.shape[1], dtype='int32', value=0) # Padding the sentence
sentiment_probs = model.predict(sentence, batch_size=1, verbose=2)[0] # Predicting the sentence text
sentiment = np.argmax(sentiment_probs)

print(sentiment_probs)
if sentiment == 0:
    print("Neutral")
elif sentiment == 1:
    print("Negative")
else:
    print("Positive")

# Custom wrapper for Keras model
from sklearn.base import BaseEstimator, ClassifierMixin

class CustomKerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn=None, epochs=1, batch_size=32, verbose=1, **sk_params):
        self.build_fn = build_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **kwargs):
        self.model = self.build_fn()
        return self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, **kwargs)

    def predict(self, X, **kwargs):
        return self.model.predict(X, **kwargs)

    def predict_proba(self, X, **kwargs):
        return self.model.predict(X, **kwargs)

    def score(self, X, y, **kwargs):
        _, accuracy = self.model.evaluate(X, y, verbose=0)
        return accuracy

# Use the custom Keras classifier
model = CustomKerasClassifier(build_fn=createmodel, verbose=2)
batch_size = [10, 20, 40]
epochs = [1, 2]
param_grid = {'batch_size': batch_size, 'epochs': epochs}
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)

# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = data['text'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))


291/291 - 41s - loss: 0.8245 - accuracy: 0.6438 - 41s/epoch - 139ms/step
144/144 - 2s - loss: 0.7601 - accuracy: 0.6599 - 2s/epoch - 11ms/step
0.7601013779640198
0.659895122051239
['loss', 'accuracy']
[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object
1/1 - 0s - 286ms/epoch - 286ms/step




[0.74329686 0.13145688 0.12524626]
Neutral
744/744 - 79s - loss: 0.8304 - accuracy: 0.6481 - 79s/epoch - 106ms/step




744/744 - 80s - loss: 0.8263 - accuracy: 0.6455 - 80s/epoch - 107ms/step




744/744 - 79s - loss: 0.8236 - accuracy: 0.6411 - 79s/epoch - 107ms/step




744/744 - 81s - loss: 0.8246 - accuracy: 0.6455 - 81s/epoch - 109ms/step




744/744 - 82s - loss: 0.8227 - accuracy: 0.6468 - 82s/epoch - 110ms/step




Epoch 1/2
744/744 - 81s - loss: 0.8258 - accuracy: 0.6519 - 81s/epoch - 108ms/step
Epoch 2/2
744/744 - 72s - loss: 0.6782 - accuracy: 0.7113 - 72s/epoch - 97ms/step




Epoch 1/2
744/744 - 80s - loss: 0.8219 - accuracy: 0.6430 - 80s/epoch - 107ms/step
Epoch 2/2
744/744 - 70s - loss: 0.6818 - accuracy: 0.7121 - 70s/epoch - 95ms/step




Epoch 1/2
744/744 - 82s - loss: 0.8231 - accuracy: 0.6443 - 82s/epoch - 110ms/step
Epoch 2/2
744/744 - 74s - loss: 0.6703 - accuracy: 0.7176 - 74s/epoch - 99ms/step




Epoch 1/2
744/744 - 81s - loss: 0.8247 - accuracy: 0.6433 - 81s/epoch - 109ms/step
Epoch 2/2
744/744 - 73s - loss: 0.6727 - accuracy: 0.7100 - 73s/epoch - 99ms/step




Epoch 1/2
744/744 - 81s - loss: 0.8236 - accuracy: 0.6408 - 81s/epoch - 109ms/step
Epoch 2/2
744/744 - 72s - loss: 0.6691 - accuracy: 0.7139 - 72s/epoch - 97ms/step




372/372 - 46s - loss: 0.8263 - accuracy: 0.6453 - 46s/epoch - 124ms/step




372/372 - 46s - loss: 0.8283 - accuracy: 0.6484 - 46s/epoch - 123ms/step




372/372 - 44s - loss: 0.8291 - accuracy: 0.6392 - 44s/epoch - 119ms/step




372/372 - 47s - loss: 0.8381 - accuracy: 0.6391 - 47s/epoch - 126ms/step




372/372 - 46s - loss: 0.8205 - accuracy: 0.6476 - 46s/epoch - 125ms/step




Epoch 1/2
372/372 - 46s - loss: 0.8340 - accuracy: 0.6415 - 46s/epoch - 124ms/step
Epoch 2/2
372/372 - 36s - loss: 0.6864 - accuracy: 0.7057 - 36s/epoch - 97ms/step




Epoch 1/2
372/372 - 47s - loss: 0.8252 - accuracy: 0.6454 - 47s/epoch - 126ms/step
Epoch 2/2
372/372 - 37s - loss: 0.6815 - accuracy: 0.7115 - 37s/epoch - 99ms/step




Epoch 1/2
372/372 - 46s - loss: 0.8387 - accuracy: 0.6410 - 46s/epoch - 125ms/step
Epoch 2/2
372/372 - 36s - loss: 0.6823 - accuracy: 0.7097 - 36s/epoch - 98ms/step




Epoch 1/2
372/372 - 45s - loss: 0.8284 - accuracy: 0.6374 - 45s/epoch - 120ms/step
Epoch 2/2
372/372 - 36s - loss: 0.6753 - accuracy: 0.7138 - 36s/epoch - 97ms/step




Epoch 1/2
372/372 - 45s - loss: 0.8242 - accuracy: 0.6460 - 45s/epoch - 121ms/step
Epoch 2/2
372/372 - 35s - loss: 0.6784 - accuracy: 0.7081 - 35s/epoch - 93ms/step




186/186 - 29s - loss: 0.8425 - accuracy: 0.6379 - 29s/epoch - 155ms/step




186/186 - 28s - loss: 0.8410 - accuracy: 0.6381 - 28s/epoch - 150ms/step




186/186 - 27s - loss: 0.8421 - accuracy: 0.6348 - 27s/epoch - 146ms/step




186/186 - 29s - loss: 0.8546 - accuracy: 0.6301 - 29s/epoch - 153ms/step




186/186 - 28s - loss: 0.8379 - accuracy: 0.6344 - 28s/epoch - 150ms/step




Epoch 1/2
186/186 - 28s - loss: 0.8379 - accuracy: 0.6349 - 28s/epoch - 149ms/step
Epoch 2/2
186/186 - 18s - loss: 0.6852 - accuracy: 0.7031 - 18s/epoch - 99ms/step




Epoch 1/2
186/186 - 28s - loss: 0.8491 - accuracy: 0.6338 - 28s/epoch - 153ms/step
Epoch 2/2
186/186 - 19s - loss: 0.6893 - accuracy: 0.7057 - 19s/epoch - 102ms/step




Epoch 1/2
186/186 - 29s - loss: 0.8473 - accuracy: 0.6371 - 29s/epoch - 153ms/step
Epoch 2/2
186/186 - 19s - loss: 0.6961 - accuracy: 0.7003 - 19s/epoch - 101ms/step




Epoch 1/2
186/186 - 28s - loss: 0.8446 - accuracy: 0.6315 - 28s/epoch - 148ms/step
Epoch 2/2
186/186 - 20s - loss: 0.6892 - accuracy: 0.7024 - 20s/epoch - 107ms/step




Epoch 1/2
186/186 - 28s - loss: 0.8444 - accuracy: 0.6358 - 28s/epoch - 152ms/step
Epoch 2/2
186/186 - 18s - loss: 0.6765 - accuracy: 0.7120 - 18s/epoch - 96ms/step


  _data = np.array(data, dtype=dtype, copy=copy,


Epoch 1/2
233/233 - 33s - loss: 0.8258 - accuracy: 0.6451 - 33s/epoch - 141ms/step
Epoch 2/2
233/233 - 24s - loss: 0.6823 - accuracy: 0.7113 - 24s/epoch - 103ms/step
Best: 0.678144 using {'batch_size': 40, 'epochs': 2}
