In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Embedding, SimpleRNN
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load the dataset
data = pd.read_csv('training.csv')

# Split the data into text and labels
text = data['text'].astype(str).values
labels = data['label'].values

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text)
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(text)
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define the RNN model
model = Sequential()
model.add(Embedding(vocab_size, 128, input_length=max_sequence_length))
model.add(SimpleRNN(128))
model.add(Dense(6, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Make predictions
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Calculate evaluation metrics
report = classification_report(y_test, y_pred)

# Print the evaluation metrics
print("Classification Report:")
print(report)


2023-05-30 16:19:16.428041: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.88      0.85      1092
           1       0.76      0.76      0.76      1176
           2       0.46      0.39      0.43       318
           3       0.55      0.65      0.59       462
           4       0.70      0.60      0.65       415
           5       0.48      0.35      0.41       137

    accuracy                           0.71      3600
   macro avg       0.63      0.60      0.61      3600
weighted avg       0.71      0.71      0.71      3600

