In [2]:
pip install cmaes

Collecting cmaes
  Downloading cmaes-0.10.0-py3-none-any.whl.metadata (19 kB)
Downloading cmaes-0.10.0-py3-none-any.whl (29 kB)
Installing collected packages: cmaes
Successfully installed cmaes-0.10.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
import optuna
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd


In [None]:
# Load the dataset
df = pd.read_csv('/kaggle/input/playground-series-s4e2/train.csv')

# Display the DataFrame
df.head()

In [None]:
# Correct target variable name
target = 'NObeyesdad'

# Splitting the dataset into features and target variable
X = df.drop(target, axis=1)
y = df[target]

# Convert categorical features using one-hot encoding
X = pd.get_dummies(X)

X.head()

In [None]:
# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize the label encoder
label_encoder = LabelEncoder()

# Fit and transform the target variable
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

In [None]:
from sklearn.neural_network import MLPClassifier

# Define the objective function to optimize
def objective(trial):
    # Define hyperparameters to search
    params = {
        'hidden_layer_sizes': (trial.suggest_int('n_neurons_layer1', 10, 1000),
                               trial.suggest_int('n_neurons_layer2', 10, 1000)),
        'alpha': trial.suggest_loguniform('alpha', 1e-5, 1e-1),
        'learning_rate_init': trial.suggest_loguniform('learning_rate_init', 0.0001, 0.1),
        'max_iter': 1000,  # Maximum number of iterations
        'random_state': 42,
        'early_stopping': True  # Enable early stopping
    }

    # Initialize MLP classifier
    model = MLPClassifier(**params)

    # Fit the model
    model.fit(X_train, y_train_encoded)

    # Predict on the validation set
    y_pred_encoded = model.predict(X_val)

    # Decode the predictions back to original labels
    y_pred = label_encoder.inverse_transform(y_pred_encoded)

    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_pred)

    return accuracy

# Define study
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.CmaEsSampler())

# Optimize hyperparameters
study.optimize(objective, n_trials=100)

# Get the best hyperparameters
best_params = study.best_params


In [None]:
from sklearn.metrics import accuracy_score

# Train the final model with the best hyperparameters
final_model = MLPClassifier(**best_params)
final_model.fit(X_train, y_train_encoded)

# Predict on the validation set
y_pred_encoded = final_model.predict(X_val)

# Decode the predictions back to original labels
y_pred = label_encoder.inverse_transform(y_pred_encoded)

# Calculate the accuracy
accuracy = accuracy_score(y_val, y_pred)
print('Validation Accuracy:', accuracy)
0.

In [None]:
# Load the test dataset
test_df = pd.read_csv('/kaggle/input/playground-series-s4e2/test.csv')

# Preprocess the test dataset (e.g., one-hot encoding for categorical features)
test_X = pd.get_dummies(test_df)

# Reorder columns in the test dataset to match the order of columns in the training dataset
test_X = test_X.reindex(columns=X.columns, fill_value=0)

# Predict on the test set using the final model
test_y_pred_encoded = final_model.predict(test_X)

# Decode the predictions back to original labels
test_y_pred = label_encoder.inverse_transform(test_y_pred_encoded)

# Create a DataFrame for submission
submission_df = pd.DataFrame({
    'id': test_df['id'],
    'NObeyesdad': test_y_pred
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission.csv', index=False)
