In [1]:
pip install tensorflow keras numpy pandas nltk

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, SpatialDropout1D
from keras.callbacks import EarlyStopping
from keras.preprocessing.text import Tokenizer

In [3]:
# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\91813\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\91813\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\91813\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
# Load train dataset
# Load train dataset
train_df = pd.read_csv('train (1).csv')
train_df

Unnamed: 0,Tweets,label
0,Asked #ChatGPT about what it thinks are the pr...,Positive
1,#ChatGPT tornado has already traveled around t...,Neutral
2,This is a great explanation of why #EVs are mo...,Positive
3,‘if you need to write a box-ticking social med...,Positive
4,Just saw an AI tool making my coffee for me. \...,Positive
...,...,...
765,,
766,,
767,,
768,,


In [6]:
# Drop NaN values
train_df.dropna(inplace=True)

In [7]:
# Preprocessing
lemmatizer = WordNetLemmatizer()

def preprocess_text(tweet):
    tweet = tweet.lower()
    words = word_tokenize(tweet)
    words = [word for word in words if word.isalnum()]
    words = [word for word in words if word not in stopwords.words('english')]
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

train_df['cleaned_text'] = train_df['Tweets'].apply(preprocess_text)

In [8]:
# Tokenization
max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words, split=' ')
tokenizer.fit_on_texts(train_df['cleaned_text'].values)
X_train = tokenizer.texts_to_sequences(train_df['cleaned_text'].values)
X_train = pad_sequences(X_train, maxlen=max_len)

In [10]:
# Encoding labels
label_dict = {'Positive': 0, 'Neutral': 1, 'Negative': 2}
train_df['label_encoded'] = train_df['label'].map(label_dict)
y_train = to_categorical(train_df['label_encoded'].values)

In [12]:
# Load test dataset
test_df = pd.read_csv('test.csv')
test_df

Unnamed: 0,@CoffeeVectors @jenny____r confirmed #ChatGPT has dad jokes https://t.co/tC2Sks2f76,Positive
0,Sooooo depressing (our competition as journos ...,Negative
1,I just had a 4 hour deep conversation with #Ch...,Positive
2,I began to explore #chatgpt today.\n\nI asked ...,Negative
3,/2023\n1. Ask #ChatGPT\n2. Add some content\n3...,Positive
4,#Explained: Are you soon going to be replaced?...,Negative
...,...,...
165,Earth's #OpenAIChatGPT has become sentient an...,Positive
166,The first response was elaborate with some obv...,Positive
167,The absolutely terrifying inevitability of #Ch...,Negative
168,I asked ChatGPT what video would give the most...,Neutral


In [13]:
# Drop NaN values
test_df.dropna(inplace=True)

In [15]:
# Preprocessing for test dataset
test_df['cleaned_text'] = test_df['Positive'].apply(preprocess_text)

In [16]:
# Tokenization for test dataset
X_test = tokenizer.texts_to_sequences(test_df['cleaned_text'].values)
X_test = pad_sequences(X_test, maxlen=max_len)

In [18]:
# Encoding labels for test dataset
test_df['label_encoded'] = test_df['Positive'].map(label_dict)
y_test = to_categorical(test_df['label_encoded'].values)

In [19]:
# LSTM model
embedding_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=X_train.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [20]:
# Early stopping
earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')

In [23]:
# Model training
batch_size = 32
epochs = 20

history = model.fit(X_train, y_train, validation_data=(X_test, y_test), 
                    epochs=epochs, batch_size=batch_size, verbose=2, callbacks=[earlystop])

Epoch 1/20
19/19 - 9s - loss: 0.0563 - accuracy: 0.9850 - val_loss: 0.7071 - val_accuracy: 0.6706 - 9s/epoch - 451ms/step
Epoch 2/20
19/19 - 8s - loss: 0.0377 - accuracy: 0.9917 - val_loss: 0.6247 - val_accuracy: 0.6706 - 8s/epoch - 442ms/step
Epoch 3/20
19/19 - 9s - loss: 0.0242 - accuracy: 0.9950 - val_loss: 0.6038 - val_accuracy: 0.6706 - 9s/epoch - 460ms/step
Epoch 4/20
19/19 - 8s - loss: 0.0215 - accuracy: 0.9950 - val_loss: 0.6020 - val_accuracy: 0.6706 - 8s/epoch - 437ms/step
Epoch 5/20
19/19 - 8s - loss: 0.0168 - accuracy: 0.9950 - val_loss: 0.6086 - val_accuracy: 0.6706 - 8s/epoch - 434ms/step
Epoch 6/20
19/19 - 8s - loss: 0.0120 - accuracy: 0.9950 - val_loss: 0.6040 - val_accuracy: 0.6706 - 8s/epoch - 431ms/step
Epoch 7/20
19/19 - 8s - loss: 0.0196 - accuracy: 0.9950 - val_loss: 0.5979 - val_accuracy: 0.6706 - 8s/epoch - 435ms/step
Epoch 8/20
19/19 - 8s - loss: 0.0449 - accuracy: 0.9883 - val_loss: 0.6761 - val_accuracy: 0.8882 - 8s/epoch - 427ms/step
Epoch 9/20
19/19 - 8s - 

In [24]:
# Evaluate model
score = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", score[0])
print("Test Accuracy:", score[1])

Test Loss: 0.6004962921142578
Test Accuracy: 0.8882352709770203


Hyperparameter tuning using techniques like grid search or random search can also be employed to further increase the test accuracy.