In [88]:
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

In [89]:
import os
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

In [90]:
# Load the dataset
df = pd.read_csv('../datasets/cleaned_twitter_dataset.csv')
df.columns = ['score', 'text',]

In [91]:
df.head()

Unnamed: 0,score,text
0,0,"['kenichan', 'dived', 'many', 'time', 'ball', ..."
1,0,"['whole', 'body', 'feel', 'itchy', 'like', 'fi..."
2,0,"['nationwideclass', 'behaving', 'im', 'mad', '..."
3,0,"['kwesidei', 'whole', 'crew']"
4,0,"['need', 'hug']"


In [92]:
df.shape

(1599998, 2)

In [93]:
# count missing values by colum
df.isnull().sum()

score    0
text     0
dtype: int64

In [94]:
# Remove rows with missing values
df = df.dropna(subset=['score', 'text'])
df.shape

(1599998, 2)

In [95]:
df.nunique()

score          2
text     1553368
dtype: int64

In [96]:
df['score'].nunique()

2

In [97]:
df['score'].unique()

array([0, 4])

In [98]:
df['score'] = df['score'].replace(4, 1)
df.tail()

Unnamed: 0,score,text
1599993,1,"['woke', 'school', 'best', 'feeling', 'ever']"
1599994,1,"['thewdbcom', 'cool', 'hear', 'old', 'walt', '..."
1599995,1,"['ready', 'mojo', 'makeover', 'ask', 'detail']"
1599996,1,"['happy', '38th', 'birthday', 'boo', 'alll', '..."
1599997,1,"['happy', 'charitytuesday', 'thenspcc', 'spark..."


In [99]:
df['score'].unique()

array([0, 1])

In [100]:
# Split the dataset into training and testing sets
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

In [101]:
# Tokenize the text data
tokenizer = Tokenizer(num_words=200000, split=' ')
tokenizer.fit_on_texts(df_train['text'].values)

In [102]:
# Convert the text data to sequences and pad them
max_len = max([len(s.split()) for s in df_train['text'].values])
x_train = pad_sequences(tokenizer.texts_to_sequences(df_train['text'].values), maxlen=max_len)
x_test = pad_sequences(tokenizer.texts_to_sequences(df_test['text'].values), maxlen=max_len)

In [103]:
# Get the target variables
y_train = df_train['score']
y_test = df_test['score']

In [104]:
# Define the model architecture
model = Sequential()
model.add(Embedding(input_dim=200000, output_dim=128, input_length=max_len))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [105]:
# Fit the model to the training data
os.environ['TF_METAL_DEVICE_PLACEMENT'] = 'metal:0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Reduce the number of samples used per epoch to 1000
batch_size = 64
steps_per_epoch = 1000
num_epochs = 10

model.fit(x_train, y_train, validation_split=0.1, batch_size=batch_size, epochs=num_epochs, steps_per_epoch=steps_per_epoch)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x3249bc9d0>

In [106]:
# Save model
model.save('final_models/lstm_e10_b64.h5')

In [107]:
# Evaluate model
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print('Accuracy: %f' % (accuracy*100))
print('Loss: %f' % (loss*100))

Accuracy: 79.092187
Loss: 44.400445


In [108]:
# load the saved model
from keras.models import load_model
model = load_model('final_models/lstm_e10_b128_p20.h5')

# make predictions
y_pred = model.predict(x_test)

InvalidArgumentError: Graph execution error:

Detected at node 'sequential/embedding/embedding_lookup' defined at (most recent call last):
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
      handle._run()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2961, in run_cell
      result = self._run_cell(
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3016, in _run_cell
      result = runner(coro)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3221, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3400, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/yh/hy8tkdbn4jjcx_sdz_6sjp280000gn/T/ipykernel_2876/3231532604.py", line 6, in <module>
      y_pred = model.predict(x_test)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/training.py", line 2382, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/training.py", line 2169, in predict_function
      return step_function(self, iterator)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/training.py", line 2155, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/training.py", line 2143, in run_step
      outputs = model.predict_step(data)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/training.py", line 2111, in predict_step
      return self(x, training=False)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/sequential.py", line 412, in call
      return super().call(inputs, training=training, mask=mask)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/Users/thaveesha/Developer/miniconda3/envs/tensorflow/lib/python3.10/site-packages/keras/layers/core/embedding.py", line 272, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'sequential/embedding/embedding_lookup'
indices[30,51] = 116358 is not in [0, 5000)
	 [[{{node sequential/embedding/embedding_lookup}}]] [Op:__inference_predict_function_177361]

In [None]:
import matplotlib.pyplot as plt

plt.plot(y_test, label='actual')
plt.plot(y_pred, label='predicted')
plt.legend()
plt.show()

In [None]:
# Make predictions
new_sentences = ['I am feeling sad today', 'Today is a great day']
new_sequences = tokenizer.texts_to_sequences(new_sentences)
new_sequences = pad_sequences(new_sequences)
predictions = model.predict(new_sequences)