In [3]:
import pandas as pd

In [4]:
train = pd.read_csv('train_preprocessed.csv')
test = pd.read_csv('test_preprocessed.csv')

In [5]:
num_features = [ 'mainland_nights', 'island_nights', 'ratio', 'total_nights', 'male_count', 'female_count', 'total_travellers']

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

train[num_features] = scaler.fit_transform(train[num_features])
test[num_features] = scaler.transform(test[num_features])

## Neural Networks

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

In [11]:
test = test.drop(columns='visitor_nation')

In [12]:
trip_id = test['trip_ID']
test_data = test.drop(columns='trip_ID')

categories = train['category']
train_data = train.drop(columns='category')

assert (train_data.columns == test_data.columns).all()

In [None]:
# convert into tensor
X = torch.tensor(train_data.values, dtype=torch.float32)
y = torch.tensor(categories.values, dtype=torch.long)

X_test = torch.tensor(test_data.values, dtype=torch.float32)

In [None]:
# from imblearn.over_sampling import SMOTE

# smote = SMOTE(random_state=42)
# X, y = smote.fit_resample(X, y)

# X = torch.tensor(X, dtype=torch.float32)
# y = torch.tensor(y, dtype=torch.long)

In [None]:
# define the neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(27, 128)
        self.fc2 = nn.Linear(128, 64) 
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, 3)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.1) 
        self.log_softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc4(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc5(x)
        x = self.log_softmax(x)
        return x  

model = NeuralNetwork()

# Define a loss function and optimizer
criterion = nn.NLLLoss()  
optimizer = optim.SGD(model.parameters(), lr=0.01)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# training
num_epochs = 100
batch_size = 128  # Batch size for training
num_batches = len(X) // batch_size

for epoch in range(num_epochs):
    for i in range(num_batches):
        start = i * batch_size
        end = start + batch_size
        X_batch = X[start:end]

        y_batch = y[start:end]

        log_probs = model(X_batch)  
        loss = criterion(log_probs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/100], Loss: 0.1241
Epoch [2/100], Loss: 0.1586
Epoch [3/100], Loss: 0.1861
Epoch [4/100], Loss: 0.2479
Epoch [5/100], Loss: 0.2741
Epoch [6/100], Loss: 0.3633
Epoch [7/100], Loss: 0.4437
Epoch [8/100], Loss: 0.5122
Epoch [9/100], Loss: 0.6133
Epoch [10/100], Loss: 0.6406
Epoch [11/100], Loss: 0.6304
Epoch [12/100], Loss: 0.7213
Epoch [13/100], Loss: 0.7009
Epoch [14/100], Loss: 0.7543
Epoch [15/100], Loss: 0.8071
Epoch [16/100], Loss: 0.7981
Epoch [17/100], Loss: 0.7691
Epoch [18/100], Loss: 0.7280
Epoch [19/100], Loss: 0.6992
Epoch [20/100], Loss: 0.6808
Epoch [21/100], Loss: 0.6543
Epoch [22/100], Loss: 0.6069
Epoch [23/100], Loss: 0.5329
Epoch [24/100], Loss: 0.5107
Epoch [25/100], Loss: 0.5053
Epoch [26/100], Loss: 0.5314
Epoch [27/100], Loss: 0.5109
Epoch [28/100], Loss: 0.4540
Epoch [29/100], Loss: 0.4429
Epoch [30/100], Loss: 0.4520
Epoch [31/100], Loss: 0.4020
Epoch [32/100], Loss: 0.3897
Epoch [33/100], Loss: 0.3305
Epoch [34/100], Loss: 0.3255
Epoch [35/100], Loss: 0

In [17]:
model.eval()

with torch.no_grad():
    log_probs = model(X_test)  
    predictions = torch.argmax(log_probs, dim=1)

In [18]:
predictions.shape

torch.Size([5852])

In [19]:
df_predictions = pd.concat([trip_id, pd.DataFrame(predictions.numpy(), columns=["category"])], axis=1)

In [20]:
df_predictions.shape

(5852, 2)

In [21]:
df_predictions.to_csv("submission.csv", index=False)

In [22]:
model = NeuralNetwork()
yhat = model(X_batch)

In [None]:
# visualization of the model
from torchviz import make_dot

make_dot(yhat, params=dict(list(model.named_parameters()))).render("model_visualization", format="png")

'model_visualization.png'

## SVM

In [7]:
from sklearn.model_selection import train_test_split

# train test split with stratify
X = train.drop(columns=['category'])
y = train['category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42,stratify=y)

In [9]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report

# SVM Classifier
svm = SVC(random_state=42)

# Define the parameter grid with additional kernels
param_grid = {
    'C': [0.1, 1, 10],                     # Regularization strength
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],  # Kernel types
    'class_weight': ['balanced']     # Class weights for handling imbalance
}

# Grid Search with cross-validation
grid_search = GridSearchCV(
    svm, param_grid, 
    cv=5, scoring='f1_weighted', 
    verbose=2, n_jobs=-1
)
grid_search.fit(X_train, y_train)

# Best parameters and accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best F1 Score (Training):", grid_search.best_score_)

# Test the best model
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate on the test set
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
print("\nTest Accuracy:", accuracy)
print("Test F1 Score:", f1)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Fitting 5 folds for each of 12 candidates, totalling 60 fits


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


[CV] END ...........C=0.1, class_weight=balanced, kernel=rbf; total time=   5.8s
[CV] END ...........C=0.1, class_weight=balanced, kernel=rbf; total time=   6.4s
[CV] END ...........C=0.1, class_weight=balanced, kernel=rbf; total time=   6.2s
[CV] END ..........C=0.1, class_weight=balanced, kernel=poly; total time=   4.0s
[CV] END ...........C=0.1, class_weight=balanced, kernel=rbf; total time=   5.0s
[CV] END ...........C=0.1, class_weight=balanced, kernel=rbf; total time=   5.8s
[CV] END ..........C=0.1, class_weight=balanced, kernel=poly; total time=   3.9s
[CV] END ..........C=0.1, class_weight=balanced, kernel=poly; total time=   3.6s
[CV] END ..........C=0.1, class_weight=balanced, kernel=poly; total time=   4.1s
[CV] END ..........C=0.1, class_weight=balanced, kernel=poly; total time=   3.9s
[CV] END .......C=0.1, class_weight=balanced, kernel=sigmoid; total time=   5.7s
[CV] END .......C=0.1, class_weight=balanced, kernel=sigmoid; total time=   7.3s
[CV] END .......C=0.1, class

In [10]:
best_model.get_params()

{'C': 1,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': 'balanced',
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': 42,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [15]:

train_columns = [col for col in train.columns if col not in ('category', 'trip_ID', 'visitor_nation')]

test_reordered = test[train_columns]


preds = pd.concat([test['trip_ID'], pd.Series(best_model.predict(test_reordered))], axis=1)
preds.columns = ['trip_ID', 'category']
preds.to_csv("submission_svm.csv", index=False)

## MLP Classifier

In [31]:
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Define the MLPClassifier with parameter tuning
mlp = MLPClassifier(max_iter=500, random_state=42)

# Define hyperparameter grid for tuning
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (100, 50), (50, 50, 50)],  # Different architectures
    'activation': ['relu', 'tanh'],                                  # Activation functions
    'solver': ['adam', 'sgd'],                                       # Optimizers
    'alpha': [0.0001, 0.001, 0.01],                                  # Regularization strength
    'learning_rate': ['constant', 'adaptive'],                       # Learning rate strategies
}



# GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(
    estimator=mlp,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    verbose=2,
)

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best model and evaluate on the test set
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print("Best Parameters:", grid_search.best_params_)
print("Training Accuracy:", grid_search.best_score_)
print("Test Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Fitting 5 folds for each of 96 candidates, totalling 480 fits
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   1.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   1.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   2.1s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   1.4s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   1.5s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=sgd; total time=   0.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant, solver=sgd; total time=   1.4s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=consta



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam; total time=   4.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam; total time=   4.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam; total time=   3.8s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam; total time=   4.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=adam; total time=   4.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=sgd; total time=   1.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=sgd; total time=   1.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=sgd; total time=   0.9s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=sgd; total time=   0.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant, solver=sgd; total time=   1.8s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam; total time=   4.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam; total time=   4.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam; total time=   3.8s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam; total time=   4.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=adam; total time=   4.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=sgd; total time=   2.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=sgd; total time=   1.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=sgd; total time=   1.9s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=sgd; total time=   1.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive, solver=sgd; total time=   2.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  16.6s
[CV] END activatio



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  18.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.8s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  18.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.7s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   3.9s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   7.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   5.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  16.8s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=   8.



[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  17.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  17.8s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  18.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   8.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   6.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   9.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   6.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   8.4s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  21.9s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  19.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  11.1s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  19.5s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  19.1s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   5.2s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   3.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   3.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   4.9s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   2.3s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  18.7s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.4s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  11.0s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.5s




[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  21.3s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   7.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   7.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   8.0s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   7.6s
[CV] END activation=tanh, alpha=0.0001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   9.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   3.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total ti



[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  16.7s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  11.7s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.2s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   3.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   7.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   5.0s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  17.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  16.0s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  17.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  11.8s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  18.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   9.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=  10.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=  10.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   6.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   8.3s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  20.3s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  20.3s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  21.5s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  19.6s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  19.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   5.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   3.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   3.5s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   4.8s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   2.6s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.1s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.4s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  22.0s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.7s




[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   7.9s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   6.7s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   8.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   8.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   9.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   1.8s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=constant, solver=adam; total time=   2.



[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.3s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  16.8s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.6s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=   6.8s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=adam; total time=  17.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   7.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   4.0s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=constant, solver=sgd; total time=   5.2s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  18.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  16.8s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  17.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=   6.5s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=adam; total time=  18.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   7.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   6.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   9.5s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   6.7s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(100, 50), learning_rate=adaptive, solver=sgd; total time=   7.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  13.7s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  20.1s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  22.6s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  21.4s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=adam; total time=  26.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   7.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   4.8s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   5.2s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   7.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=constant, solver=sgd; total time=   3.6s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  19.1s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  26.5s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  26.1s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  18.5s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=adam; total time=  25.9s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=  10.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=   9.8s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=  11.4s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=  10.7s
[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50, 50, 50), learning_rate=adaptive, solver=sgd; total time=  12.5s
Best Parameters: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'solver': 'adam'}
Training Accuracy: 0.7152866242038216
Test Accuracy: 0.7121752419765665

Classification Report:
               

In [None]:
train_columns = [col for col in train.columns if col not in ('category', 'trip_ID', 'visitor_nation')]

test_reordered = test[train_columns]

preds = pd.concat([test['trip_ID'], pd.Series(best_mlp.predict(test_reordered))], axis=1)
preds.columns = ['trip_ID', 'category']
preds.to_csv("submission_mlp.csv", index=False)

## Decision Tree

In [25]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

dt_model = DecisionTreeClassifier(random_state=42)

# Define the parameter grid
param_grid = {
    'criterion': ['gini', 'entropy', 'log_loss'],  # Splitting criteria
    'splitter': ['best', 'random'],               # How to split nodes
    'max_depth': [10, 20],              # Maximum depth of the tree
    'min_samples_split': [5, 10],              # Minimum samples required to split a node
    'min_samples_leaf': [2, 5],                # Minimum samples required in a leaf node
    'class_weight': ['balanced']            # Adjust class weights for imbalance
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(
    dt_model, param_grid, 
    cv=5, scoring='f1_weighted', 
    verbose=2, n_jobs=-1
)
grid_search.fit(X_train, y_train)

# Evaluate the best model
best_dt_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Test the best model
y_pred = best_dt_model.predict(X_test)

# Evaluate performance on the test set
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("\nTest Accuracy:", accuracy)
print("Test F1 Score:", f1)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 5 folds for each of 48 candidates, totalling 240 fits


  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=best; total time=   0.1s
[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=random; total time=   0.0s
[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=random; total time=   0.0s
[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=best; total time=   0.1s
[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=best; total time=   0.1s
[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=random; total time=   0.0s
[CV] END class_weight=balanced, criterion=gini, max_depth=10, min_samples_leaf=2, min_samples_split=5, splitter=best; total time=   0.1s
[CV] END class_weight=balanced, cri

In [26]:
train_columns = [col for col in train.columns if col not in ('category', 'trip_ID', 'visitor_nation')]

test_reordered = test[train_columns]


preds = pd.concat([test['trip_ID'], pd.Series(best_dt_model.predict(test_reordered))], axis=1)
preds.columns = ['trip_ID', 'category']
preds.to_csv("submission_dt.csv", index=False)