In [1]:
!pip uninstall -y tensorflow keras pandas matplotlib scikit-learn
!pip install tensorflow==2.15.0 keras==2.15.0 pandas==2.0.3 matplotlib==3.9.1 scikit-learn==1.5.1


Found existing installation: tensorflow 2.15.0
Uninstalling tensorflow-2.15.0:
  Successfully uninstalled tensorflow-2.15.0
Found existing installation: keras 2.15.0
Uninstalling keras-2.15.0:
  Successfully uninstalled keras-2.15.0
Found existing installation: pandas 2.0.3
Uninstalling pandas-2.0.3:
  Successfully uninstalled pandas-2.0.3
Found existing installation: matplotlib 3.7.1
Uninstalling matplotlib-3.7.1:
  Successfully uninstalled matplotlib-3.7.1
Found existing installation: scikit-learn 1.2.2
Uninstalling scikit-learn-1.2.2:
  Successfully uninstalled scikit-learn-1.2.2
Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras==2.15.0
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 

In [3]:
import pandas as pd # Basic packages for creating dataframes and loading dataset
import numpy as np
import matplotlib.pyplot as plt # Package for visualization
import re # importing package for Regular expression operations
from sklearn.model_selection import train_test_split # Package for splitting the data
from sklearn.preprocessing import LabelEncoder # Package for conversion of categorical to Numerical
from tensorflow.keras.preprocessing.text import Tokenizer # Tokenization
from tensorflow.keras.preprocessing.sequence import pad_sequences # Add zeros or crop based on the length
from tensorflow.keras.models import Sequential # Sequential Neural Network
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D # For layers in Neural Network
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from sklearn.model_selection import GridSearchCV


# Load the dataset as a Pandas DataFrame
path_to_csv = '/content/Sentiment (3) (2).csv'
dataset = pd.read_csv(path_to_csv, header=0)

# Select only the necessary columns 'text' and 'sentiment'
mask = dataset.columns.isin(['text', 'sentiment'])
data = dataset.loc[:, mask]

# Keeping only the necessary columns
data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ') # Removing Retweets

max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ') # Maximum words is 2000 to tokenize sentence
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values) # Taking values to feature matrix
X = pad_sequences(X) # Padding the feature matrix

embed_dim = 128 # Dimension of the Embedded layer
lstm_out = 196 # Long short-term memory (LSTM) layer neurons

def createmodel():
    model = Sequential() # Sequential Neural Network
    model.add(Embedding(max_features, embed_dim, input_length = X.shape[1])) # input dimension 2000 Neurons, output dimension 128 Neurons
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) # Drop out 20%, 196 output Neurons, recurrent dropout 20%
    model.add(Dense(3, activation='softmax')) # 3 output neurons[positive, Neutral, Negative], softmax as activation
    model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics = ['accuracy']) # Compiling the model
    return model

labelencoder = LabelEncoder() # Applying label Encoding on the label matrix
integer_encoded = labelencoder.fit_transform(data['sentiment']) # Fitting the model
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.33, random_state=42) # 67% training data, 33% test data split

batch_size = 32 # Batch size 32
model = createmodel() # Function call to Sequential Neural Network
model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=2) # verbose the higher, the more messages
score, acc = model.evaluate(X_test, Y_test, verbose=2, batch_size=batch_size) # evaluating the model
print(score)
print(acc)
print(model.metrics_names) # metrics of the model
print(integer_encoded)
print(data['sentiment'])

# Predicting on the text data
sentence = ['A lot of good things are happening. We are respected again throughout the world, and that is a great thing.@realDonaldTrump']
sentence = tokenizer.texts_to_sequences(sentence) # Tokenizing the sentence
sentence = pad_sequences(sentence, maxlen=X.shape[1], dtype='int32', value=0) # Padding the sentence
sentiment_probs = model.predict(sentence, batch_size=1, verbose=2)[0] # Predicting the sentence text
sentiment = np.argmax(sentiment_probs)

print(sentiment_probs)
if sentiment == 0:
    print("Neutral")
elif sentiment == 1:
    print("Negative")
else:
    print("Positive")
#2
# Custom wrapper for Keras model
from sklearn.base import BaseEstimator, ClassifierMixin

class CustomKerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn=None, epochs=1, batch_size=32, verbose=1, **sk_params):
        self.build_fn = build_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **kwargs):
        self.model = self.build_fn()
        return self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, **kwargs)

    def predict(self, X, **kwargs):
        return self.model.predict(X, **kwargs)

    def predict_proba(self, X, **kwargs):
        return self.model.predict(X, **kwargs)

    def score(self, X, y, **kwargs):
        _, accuracy = self.model.evaluate(X, y, verbose=0)
        return accuracy

# Use the custom Keras classifier
model = CustomKerasClassifier(build_fn=createmodel, verbose=2)
batch_size = [10, 20, 40]
epochs = [1, 2]
param_grid = {'batch_size': batch_size, 'epochs': epochs}
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, Y_train)

# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = data['text'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['text'] = data['text'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x))


291/291 - 39s - loss: 0.8257 - accuracy: 0.6477 - 39s/epoch - 133ms/step
144/144 - 2s - loss: 0.7522 - accuracy: 0.6745 - 2s/epoch - 11ms/step
0.7522183656692505
0.6745303869247437
['loss', 'accuracy']
[1 2 1 ... 2 0 2]
0         Neutral
1        Positive
2         Neutral
3        Positive
4        Positive
           ...   
13866    Negative
13867    Positive
13868    Positive
13869    Negative
13870    Positive
Name: sentiment, Length: 13871, dtype: object
1/1 - 0s - 276ms/epoch - 276ms/step




[0.6537958 0.1412885 0.2049157]
Neutral
744/744 - 79s - loss: 0.8234 - accuracy: 0.6470 - 79s/epoch - 106ms/step




744/744 - 80s - loss: 0.8258 - accuracy: 0.6458 - 80s/epoch - 108ms/step




744/744 - 78s - loss: 0.8241 - accuracy: 0.6465 - 78s/epoch - 105ms/step




744/744 - 79s - loss: 0.8249 - accuracy: 0.6440 - 79s/epoch - 107ms/step




744/744 - 79s - loss: 0.8191 - accuracy: 0.6406 - 79s/epoch - 106ms/step




Epoch 1/2
744/744 - 79s - loss: 0.8253 - accuracy: 0.6437 - 79s/epoch - 107ms/step
Epoch 2/2
744/744 - 69s - loss: 0.6804 - accuracy: 0.7111 - 69s/epoch - 92ms/step




Epoch 1/2
744/744 - 76s - loss: 0.8233 - accuracy: 0.6458 - 76s/epoch - 103ms/step
Epoch 2/2
744/744 - 67s - loss: 0.6847 - accuracy: 0.7112 - 67s/epoch - 91ms/step




Epoch 1/2
744/744 - 80s - loss: 0.8220 - accuracy: 0.6473 - 80s/epoch - 107ms/step
Epoch 2/2
744/744 - 71s - loss: 0.6717 - accuracy: 0.7154 - 71s/epoch - 95ms/step




Epoch 1/2
744/744 - 79s - loss: 0.8243 - accuracy: 0.6472 - 79s/epoch - 106ms/step
Epoch 2/2
744/744 - 69s - loss: 0.6774 - accuracy: 0.7124 - 69s/epoch - 92ms/step




Epoch 1/2
744/744 - 77s - loss: 0.8202 - accuracy: 0.6438 - 77s/epoch - 104ms/step
Epoch 2/2
744/744 - 67s - loss: 0.6697 - accuracy: 0.7128 - 67s/epoch - 90ms/step




372/372 - 44s - loss: 0.8341 - accuracy: 0.6430 - 44s/epoch - 118ms/step




372/372 - 43s - loss: 0.8262 - accuracy: 0.6388 - 43s/epoch - 114ms/step




372/372 - 42s - loss: 0.8295 - accuracy: 0.6422 - 42s/epoch - 113ms/step




372/372 - 44s - loss: 0.8293 - accuracy: 0.6391 - 44s/epoch - 117ms/step




372/372 - 42s - loss: 0.8217 - accuracy: 0.6469 - 42s/epoch - 113ms/step




Epoch 1/2
372/372 - 43s - loss: 0.8319 - accuracy: 0.6425 - 43s/epoch - 115ms/step
Epoch 2/2
372/372 - 33s - loss: 0.6797 - accuracy: 0.7129 - 33s/epoch - 89ms/step




Epoch 1/2
372/372 - 44s - loss: 0.8241 - accuracy: 0.6418 - 44s/epoch - 118ms/step
Epoch 2/2
372/372 - 36s - loss: 0.6848 - accuracy: 0.7068 - 36s/epoch - 98ms/step




Epoch 1/2
372/372 - 45s - loss: 0.8348 - accuracy: 0.6422 - 45s/epoch - 121ms/step
Epoch 2/2
372/372 - 36s - loss: 0.6848 - accuracy: 0.7096 - 36s/epoch - 97ms/step




Epoch 1/2
372/372 - 43s - loss: 0.8351 - accuracy: 0.6402 - 43s/epoch - 116ms/step
Epoch 2/2
372/372 - 35s - loss: 0.6821 - accuracy: 0.7104 - 35s/epoch - 94ms/step




Epoch 1/2
372/372 - 43s - loss: 0.8290 - accuracy: 0.6436 - 43s/epoch - 117ms/step
Epoch 2/2
372/372 - 34s - loss: 0.6675 - accuracy: 0.7173 - 34s/epoch - 91ms/step




186/186 - 27s - loss: 0.8470 - accuracy: 0.6341 - 27s/epoch - 145ms/step




186/186 - 26s - loss: 0.8371 - accuracy: 0.6396 - 26s/epoch - 138ms/step




186/186 - 25s - loss: 0.8436 - accuracy: 0.6337 - 25s/epoch - 137ms/step




186/186 - 29s - loss: 0.8439 - accuracy: 0.6346 - 29s/epoch - 153ms/step




186/186 - 26s - loss: 0.8451 - accuracy: 0.6363 - 26s/epoch - 142ms/step




Epoch 1/2
186/186 - 26s - loss: 0.8430 - accuracy: 0.6372 - 26s/epoch - 139ms/step
Epoch 2/2
186/186 - 17s - loss: 0.6859 - accuracy: 0.7059 - 17s/epoch - 91ms/step




Epoch 1/2
186/186 - 27s - loss: 0.8388 - accuracy: 0.6416 - 27s/epoch - 145ms/step
Epoch 2/2
186/186 - 19s - loss: 0.6867 - accuracy: 0.7132 - 19s/epoch - 100ms/step




Epoch 1/2
186/186 - 27s - loss: 0.8443 - accuracy: 0.6310 - 27s/epoch - 146ms/step
Epoch 2/2
186/186 - 18s - loss: 0.6922 - accuracy: 0.7020 - 18s/epoch - 96ms/step




Epoch 1/2
186/186 - 27s - loss: 0.8465 - accuracy: 0.6346 - 27s/epoch - 146ms/step
Epoch 2/2
186/186 - 19s - loss: 0.6791 - accuracy: 0.7116 - 19s/epoch - 101ms/step




Epoch 1/2
186/186 - 29s - loss: 0.8468 - accuracy: 0.6369 - 29s/epoch - 156ms/step
Epoch 2/2
186/186 - 19s - loss: 0.6788 - accuracy: 0.7104 - 19s/epoch - 103ms/step


  _data = np.array(data, dtype=dtype, copy=copy,


Epoch 1/2
930/930 - 95s - loss: 0.8075 - accuracy: 0.6558 - 95s/epoch - 102ms/step
Epoch 2/2
930/930 - 89s - loss: 0.6701 - accuracy: 0.7135 - 89s/epoch - 96ms/step
Best: 0.679867 using {'batch_size': 10, 'epochs': 2}
