In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import f1_score
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Embedding, Dropout, Reshape
from keras.callbacks import ModelCheckpoint

# Load data
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

# Preprocess data
vectorizer = CountVectorizer(max_features=10000)  # Set maximum number of features to 10,000
X_train = vectorizer.fit_transform(train_df['text'])
y_train = train_df['label']
X_test = vectorizer.transform(test_df['text'])

# Convert features to dense tensor
X_train = X_train.toarray()
X_test = X_test.toarray()

# Reshape data to 3D tensor
max_len = X_train.shape[1]
X_train = X_train.reshape(-1, max_len, 1)
X_test = X_test.reshape(-1, max_len, 1)

# Split training data into training and validation sets
val_size = 0.2
train_size = int((1 - val_size) * len(X_train))
train_feature = X_train[:train_size]
train_label = y_train[:train_size]
val_feature = X_train[train_size:]
val_label = y_train[train_size:]

# Define SimpleRNN model
model = Sequential()
model.add(Reshape((max_len, 1), input_shape=(max_len,)))
model.add(SimpleRNN(units=64))
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(units=8, activation='softmax'))

# Compile model with macro f1 score as eval metric
def macro_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='macro')

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=[macro_f1_score])

# Define ModelCheckpoint callback
checkpoint = ModelCheckpoint(filepath='model_checkpoint.h5', monitor='val_macro_f1_score', save_best_only=True, mode='max')

# Train model
model.fit(train_feature, train_label, batch_size=32, epochs=10, validation_data=(val_feature, val_label), callbacks=[checkpoint])

# Infer model
model.load_weights('model_checkpoint.h5')
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)

# Save submission file
submission_df = pd.DataFrame({'id': test_df['id'], 'label': y_pred})
submission_df.to_csv('submission.csv', index=False)
