In [23]:
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.callbacks import EarlyStopping
import warnings
import matplotlib.pyplot as plt

In [2]:
warnings.filterwarnings("ignore")

In [3]:
tr_df = pd.read_csv("/kaggle/input/offensive-prepped/Off_Tweet_Det/train_prepped.csv")

In [4]:
tr_df.head()

Unnamed: 0,class,tweet
0,2,"['!', '!', '!', 'rt', '@mayasolovely', ':', 'w..."
1,1,"['!', '!', '!', 'rt', '@mleew17', ':', 'boy', ..."
2,1,"['!', '!', '!', 'rt', '@urkindofbrand', 'dawg'..."
3,1,"['!', '!', '!', 'rt', '@c_g_anderson', ':', '@..."
4,1,"['!', '!', '!', 'rt', '@shenikaroberts', ':', ..."


In [5]:
mw = 500

In [6]:
tk = Tokenizer()

In [7]:
tr_list = tr_df.tweet.tolist()
tk.fit_on_texts(tr_list)

In [8]:
tr_seq = tk.texts_to_sequences(tr_list)

In [9]:
X = pad_sequences(tr_seq, maxlen = mw)

In [10]:
y = pd.get_dummies(tr_df, columns = ["class"])

In [11]:
y = y[["class_0", "class_1", "class_2"]]

In [12]:
y.head()

Unnamed: 0,class_0,class_1,class_2
0,False,False,True
1,False,True,False
2,False,True,False
3,False,True,False
4,False,True,False


In [13]:
X_tr, X_tmp, y_tr, y_tmp = train_test_split(X, y, train_size = 0.81, random_state = 42)

In [14]:
X_val, X_tst, y_val, y_tst = train_test_split(X_tmp, y_tmp, train_size = 0.5, random_state =42) 

In [15]:
len(tk.word_index)

38968

In [25]:
model = Sequential(name = "RNN_mod")

model.add(Embedding(input_dim = len(tk.word_index)+1, output_dim = 256, input_length = mw))
model.add(SimpleRNN(256, activation = "relu", return_sequences = True))
model.add(SimpleRNN(128, activation = "relu", return_sequences = True))
model.add(SimpleRNN(64, activation = "relu", return_sequences = True))
model.add(SimpleRNN(32, activation = "relu"))
model.add(Dense(3, activation = "softmax"))
model.build(input_shape=(32, mw))

model.summary()

In [26]:
model.compile(optimizer = "Adam", loss = "categorical_crossentropy", metrics = ["accuracy"])

In [27]:
est = EarlyStopping(patience = 5, restore_best_weights = True)

In [28]:
hist = model.fit(X_tr, y_tr, batch_size=32, epochs = 100, validation_data = (X_val, y_val), callbacks = [est])

Epoch 1/100
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 191ms/step - accuracy: 0.7913 - loss: 0.5154 - val_accuracy: 0.8934 - val_loss: 0.3487
Epoch 2/100
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 185ms/step - accuracy: 0.9202 - loss: 0.2414 - val_accuracy: 0.8879 - val_loss: 0.3433
Epoch 3/100
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 185ms/step - accuracy: 0.9474 - loss: 0.1444 - val_accuracy: 0.8828 - val_loss: 0.4345
Epoch 4/100
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 186ms/step - accuracy: 0.9691 - loss: 0.0903 - val_accuracy: 0.8619 - val_loss: 0.4970
Epoch 5/100
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 186ms/step - accuracy: 0.9844 - loss: 0.0508 - val_accuracy: 0.8704 - val_loss: 0.5185
Epoch 6/100
[1m628/628[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 186ms/step - accuracy: 0.9903 - loss: 0.0308 - val_accuracy: 0.8590 - val_loss: 0.721

In [1]:
metrics = hist.history
plt.plot(hist.epoch, metrics["val_accuracy"])
plt.plot(hist.epoch, metrics["accuracy"])
plt.xlabel("Epochs")
plt.show()

NameError: name 'hist' is not defined