In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping

# Load the data
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(df_train['text'], df_train['label'], test_size=0.2, random_state=42)

# Vectorize the text data using TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_vec = tfidf.fit_transform(X_train).toarray()
X_val_vec = tfidf.transform(X_val).toarray()

# Define the neural network model
model = Sequential()
model.add(Dense(256, activation='relu', input_dim=X_train_vec.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(8, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

# Train the model
model.fit(X_train_vec, pd.get_dummies(y_train), epochs=50, batch_size=64, validation_data=(X_val_vec, pd.get_dummies(y_val)), callbacks=[early_stopping])

# Load the test data
X_test_vec = tfidf.transform(df_test['text']).toarray()

# Make predictions on the test data
y_pred_prob = model.predict(X_test_vec)
y_pred = y_pred_prob.argmax(axis=1)

# Save the predictions to a CSV file
submission_df = pd.DataFrame({'id': df_test['id'], 'label': y_pred})
submission_df.to_csv('submission.csv', index=False)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
