<a href="https://colab.research.google.com/github/Charankalikota13/Neural-Network/blob/main/ICP6_700755456.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip uninstall -y tensorflow keras pandas matplotlib scikit-learn
!pip install tensorflow==2.15.0 keras==2.15.0 pandas==2.0.3 matplotlib==3.9.1 scikit-learn==1.5.1


Found existing installation: tensorflow 2.15.0
Uninstalling tensorflow-2.15.0:
  Successfully uninstalled tensorflow-2.15.0
Found existing installation: keras 2.15.0
Uninstalling keras-2.15.0:
  Successfully uninstalled keras-2.15.0
Found existing installation: pandas 2.0.3
Uninstalling pandas-2.0.3:
  Successfully uninstalled pandas-2.0.3
Found existing installation: matplotlib 3.9.1
Uninstalling matplotlib-3.9.1:
  Successfully uninstalled matplotlib-3.9.1
Found existing installation: scikit-learn 1.5.1
Uninstalling scikit-learn-1.5.1:
  Successfully uninstalled scikit-learn-1.5.1
Collecting tensorflow==2.15.0
  Using cached tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
Collecting keras==2.15.0
  Using cached keras-2.15.0-py3-none-any.whl (1.7 MB)
Collecting pandas==2.0.3
  Using cached pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
Collecting matplotlib==3.9.1
  Using cached matplotlib-3.9.1-cp310-cp310-man

In [2]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split, GridSearchCV # Import GridSearchCV here
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model

# Load the dataset as a Pandas DataFrame
path_to_csv = '/content/sample_data/Sentiment (3) (2).csv'
dataset = pd.read_csv(path_to_csv, header=0)

# Select only the necessary columns 'text' and 'sentiment'
mask = dataset.columns.isin(['text', 'sentiment'])
data = dataset.loc[:, mask]

# Keeping only the necessary columns
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ') # Removing Retweets

max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ') # Maximum words is 2000 to tokenize sentence
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values) # Taking values to feature matrix
X = pad_sequences(X) # Padding the feature matrix

embed_dim = 128 # Dimension of the Embedded layer
lstm_out = 196 # Long short-term memory (LSTM) layer neurons

def createmodel():
    model = Sequential() # Sequential Neural Network
    model.add(Embedding(max_features, embed_dim, input_length = X.shape[1])) # input dimension 2000 Neurons, output dimension 128 Neurons
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) # Drop out 20%, 196 output Neurons, recurrent dropout 20%
    model.add(Dense(3, activation='softmax')) # 3 output neurons[positive, Neutral, Negative], softmax as activation
    model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy']) # Compiling the model
    return model

labelencoder = LabelEncoder() # Applying label Encoding on the label matrix
integer_encoded = labelencoder.fit_transform(data['sentiment']) # Fitting the model
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42) # 67% training data, 33% test data split

batch_size = 32 # Batch size 32
model = createmodel() # Function call to Sequential Neural Network
model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=2) # verbose the higher, the more messages
score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=batch_size) # evaluating the model
print(score)
print(acc)
print(model.metrics_names) # metrics of the model
print(integer_encoded)
print(data['sentiment'])

# Predicting on the text data
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that is a great thing.@realDonaldTrump']
sentence = tokenizer.texts_to_sequences(sentence) # Tokenizing the sentence
sentence = pad_sequences(sentence, maxlen=X.shape[1], dtype='int32', value=0) # Padding the sentence
sentiment_probs = model.predict(sentence, batch_size=1, verbose=2)[0] # Predicting the sentence text
sentiment = np.argmax(sentiment_probs)

print(sentiment_probs)
if sentiment == 0:
    print("Neutral")
elif sentiment == 1:
    print("Negative")
else:
    print("Positive")

# Custom wrapper for Keras model
from sklearn.base import BaseEstimator, ClassifierMixin

class CustomKerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn=None, epochs=1, batch_size=32, verbose=1, **sk_params):
        self.build_fn = build_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **kwargs):
        self.model = self.build_fn()
        return self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, **kwargs)

    def predict(self, X, **kwargs):
        return self.model.predict(X, **kwargs)

    def predict_proba(self, X, **kwargs):
        return self.model.predict(X, **kwargs)

    def score(self, X, y, **kwargs):
        _, accuracy = self.model.evaluate(X, y, verbose=0)
        return accuracy

# Use the custom Keras classifier
model = CustomKerasClassifier(build_fn=createmodel, verbose=2)
batch_size = [10, 20, 40]
epochs = [1, 2]
param_grid = {'batch_size': batch_size, 'epochs': epochs}
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)

# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = data['text'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))


291/291 - 38s - loss: 0.8249 - accuracy: 0.6448 - 38s/epoch - 129ms/step
144/144 - 2s - loss: 0.7630 - accuracy: 0.6761 - 2s/epoch - 11ms/step
0.7629609107971191
0.6760594248771667
['loss', 'accuracy']
[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object
1/1 - 0s - 256ms/epoch - 256ms/step




[0.7461077  0.09453692 0.15935533]
Neutral
744/744 - 78s - loss: 0.8218 - accuracy: 0.6486 - 78s/epoch - 105ms/step




744/744 - 77s - loss: 0.8203 - accuracy: 0.6457 - 77s/epoch - 103ms/step




744/744 - 78s - loss: 0.8247 - accuracy: 0.6478 - 78s/epoch - 105ms/step




744/744 - 76s - loss: 0.8226 - accuracy: 0.6441 - 76s/epoch - 102ms/step




744/744 - 79s - loss: 0.8177 - accuracy: 0.6460 - 79s/epoch - 107ms/step




Epoch 1/2
744/744 - 76s - loss: 0.8242 - accuracy: 0.6474 - 76s/epoch - 102ms/step
Epoch 2/2
744/744 - 68s - loss: 0.6755 - accuracy: 0.7154 - 68s/epoch - 91ms/step




Epoch 1/2
744/744 - 80s - loss: 0.8255 - accuracy: 0.6468 - 80s/epoch - 108ms/step
Epoch 2/2
744/744 - 68s - loss: 0.6811 - accuracy: 0.7074 - 68s/epoch - 91ms/step




Epoch 1/2
744/744 - 81s - loss: 0.8290 - accuracy: 0.6414 - 81s/epoch - 109ms/step
Epoch 2/2
744/744 - 70s - loss: 0.6772 - accuracy: 0.7155 - 70s/epoch - 94ms/step




Epoch 1/2
744/744 - 79s - loss: 0.8339 - accuracy: 0.6438 - 79s/epoch - 107ms/step
Epoch 2/2
744/744 - 68s - loss: 0.6728 - accuracy: 0.7190 - 68s/epoch - 91ms/step




Epoch 1/2
744/744 - 80s - loss: 0.8138 - accuracy: 0.6503 - 80s/epoch - 107ms/step
Epoch 2/2
744/744 - 68s - loss: 0.6663 - accuracy: 0.7184 - 68s/epoch - 92ms/step




372/372 - 47s - loss: 0.8319 - accuracy: 0.6408 - 47s/epoch - 126ms/step




372/372 - 45s - loss: 0.8252 - accuracy: 0.6461 - 45s/epoch - 121ms/step




372/372 - 45s - loss: 0.8298 - accuracy: 0.6449 - 45s/epoch - 122ms/step




372/372 - 48s - loss: 0.8273 - accuracy: 0.6412 - 48s/epoch - 128ms/step




372/372 - 45s - loss: 0.8296 - accuracy: 0.6426 - 45s/epoch - 120ms/step




Epoch 1/2
372/372 - 42s - loss: 0.8342 - accuracy: 0.6426 - 42s/epoch - 114ms/step
Epoch 2/2
372/372 - 33s - loss: 0.6867 - accuracy: 0.7033 - 33s/epoch - 89ms/step




Epoch 1/2
372/372 - 43s - loss: 0.8321 - accuracy: 0.6422 - 43s/epoch - 115ms/step
Epoch 2/2
372/372 - 35s - loss: 0.6844 - accuracy: 0.7082 - 35s/epoch - 93ms/step




Epoch 1/2
372/372 - 42s - loss: 0.8296 - accuracy: 0.6462 - 42s/epoch - 114ms/step
Epoch 2/2
372/372 - 33s - loss: 0.6787 - accuracy: 0.7176 - 33s/epoch - 89ms/step




Epoch 1/2
372/372 - 44s - loss: 0.8401 - accuracy: 0.6405 - 44s/epoch - 118ms/step
Epoch 2/2
372/372 - 35s - loss: 0.6782 - accuracy: 0.7108 - 35s/epoch - 93ms/step




Epoch 1/2
372/372 - 42s - loss: 0.8224 - accuracy: 0.6391 - 42s/epoch - 114ms/step
Epoch 2/2
372/372 - 33s - loss: 0.6699 - accuracy: 0.7119 - 33s/epoch - 90ms/step




186/186 - 27s - loss: 0.8432 - accuracy: 0.6381 - 27s/epoch - 144ms/step




186/186 - 26s - loss: 0.8489 - accuracy: 0.6326 - 26s/epoch - 139ms/step




186/186 - 28s - loss: 0.8467 - accuracy: 0.6332 - 28s/epoch - 149ms/step




186/186 - 27s - loss: 0.8435 - accuracy: 0.6386 - 27s/epoch - 145ms/step




186/186 - 26s - loss: 0.8433 - accuracy: 0.6331 - 26s/epoch - 139ms/step




Epoch 1/2
186/186 - 26s - loss: 0.8479 - accuracy: 0.6390 - 26s/epoch - 138ms/step
Epoch 2/2
186/186 - 18s - loss: 0.6913 - accuracy: 0.7043 - 18s/epoch - 95ms/step




Epoch 1/2
186/186 - 26s - loss: 0.8495 - accuracy: 0.6371 - 26s/epoch - 142ms/step
Epoch 2/2
186/186 - 17s - loss: 0.6820 - accuracy: 0.7116 - 17s/epoch - 93ms/step




Epoch 1/2
186/186 - 27s - loss: 0.8519 - accuracy: 0.6351 - 27s/epoch - 144ms/step
Epoch 2/2
186/186 - 23s - loss: 0.6946 - accuracy: 0.7018 - 23s/epoch - 125ms/step




Epoch 1/2
186/186 - 27s - loss: 0.8438 - accuracy: 0.6358 - 27s/epoch - 145ms/step
Epoch 2/2
186/186 - 17s - loss: 0.6859 - accuracy: 0.7041 - 17s/epoch - 93ms/step




Epoch 1/2
186/186 - 26s - loss: 0.8529 - accuracy: 0.6371 - 26s/epoch - 141ms/step
Epoch 2/2
186/186 - 19s - loss: 0.6786 - accuracy: 0.7085 - 19s/epoch - 101ms/step




Epoch 1/2
930/930 - 94s - loss: 0.8132 - accuracy: 0.6527 - 94s/epoch - 101ms/step
Epoch 2/2
930/930 - 91s - loss: 0.6745 - accuracy: 0.7135 - 91s/epoch - 97ms/step
Best: 0.679758 using {'batch_size': 10, 'epochs': 2}
