In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 1. Load the preprocessed data
# Ensure 'preprocessed_train.csv' and 'preprocessed_test.csv' are in the same directory
train_df = pd.read_csv('preprocessed_train.csv')
test_df = pd.read_csv('preprocessed_test.csv')

# Load original test.csv to get the trip_id for the final submission
original_test_df = pd.read_csv('test.csv')

# 2. Separate Features and Target
X = train_df.drop('spend_category', axis=1)
y = train_df['spend_category']

# 3. Split Training Data (Optional, for validation)
# We split to check how well our model is doing before submitting
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Initialize the Neural Network
# hidden_layer_sizes: 3 layers with 128, 64, and 32 neurons respectively
# max_iter: Maximum number of iterations (epochs)
# early_stopping: Stops training if validation score doesn't improve (prevents overfitting)
mlp = MLPClassifier(hidden_layer_sizes=(128, 64, 32),
                    activation='relu',
                    solver='adam',
                    alpha=0.0001, # L2 Regularization
                    learning_rate='adaptive',
                    max_iter=500,
                    early_stopping=True,
                    random_state=42,
                    verbose=True)

# 5. Train the Model
print("Training the Neural Network...")
mlp.fit(X_train, y_train)

# 6. Evaluate on Validation Set
val_predictions = mlp.predict(X_val)
print(f"Validation Accuracy: {accuracy_score(y_val, val_predictions)}")

# 7. Retrain on ALL data for the best final performance (Optional)
# It is often better to retrain on the full dataset once you are happy with the hyperparameters
# mlp.fit(X, y) 

# 8. Make Predictions on Test Data
test_predictions = mlp.predict(test_df)

# 9. Create Submission File
submission = pd.DataFrame({
    'trip_id': original_test_df['trip_id'],
    'spend_category': test_predictions
})

# Ensure the values are integers (0, 1, 2) if required
submission['spend_category'] = submission['spend_category'].astype(int)

# Save to CSV
submission.to_csv('submission_neural_network.csv', index=False)
print("Submission file 'submission_neural_network.csv' created successfully.")

Training the Neural Network...
Iteration 1, loss = 0.75602302
Validation score: 0.738614
Iteration 2, loss = 0.60732867
Validation score: 0.740594
Iteration 3, loss = 0.57923712
Validation score: 0.749505
Iteration 4, loss = 0.56336901
Validation score: 0.746535
Iteration 5, loss = 0.55500687
Validation score: 0.747525
Iteration 6, loss = 0.54389102
Validation score: 0.750495
Iteration 7, loss = 0.53568177
Validation score: 0.761386
Iteration 8, loss = 0.51949798
Validation score: 0.761386
Iteration 9, loss = 0.50769803
Validation score: 0.743564
Iteration 10, loss = 0.49339303
Validation score: 0.748515
Iteration 11, loss = 0.47348527
Validation score: 0.752475
Iteration 12, loss = 0.45613383
Validation score: 0.746535
Iteration 13, loss = 0.43744047
Validation score: 0.740594
Iteration 14, loss = 0.42208621
Validation score: 0.738614
Iteration 15, loss = 0.40471234
Validation score: 0.743564
Iteration 16, loss = 0.37877594
Validation score: 0.732673
Iteration 17, loss = 0.35753886
Va