In [41]:
#!pip install ray

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import os
import sys
import shutil
from ray import tune, train
from ray.train import Checkpoint, get_checkpoint
from ray.tune.schedulers import ASHAScheduler
import ray.cloudpickle as pickle
import tempfile
from pathlib import Path


class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()
        self.cost = nn.CrossEntropyLoss()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x


torch.manual_seed(42)

# clear any previous kaggle predictions
kaggle_pred_dir = 'kaggle_preds'
if os.path.isdir(kaggle_pred_dir):
    shutil.rmtree(kaggle_pred_dir)
os.makedirs(kaggle_pred_dir)

# Load and preprocess data
X_train = np.load('X_train.npy')
X_val = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_val = np.load('y_test.npy')
X_kaggle = np.load('X_kaggle.npy')
kaggle_file_ids = pd.read_csv('kaggle_file_order.csv')

# combine classes to create mapping from genres to integers
y_combined = np.append(y_train, y_val, axis=0)
class_map = dict()
for class_idx, class_name in enumerate(np.unique(y_combined)):
  class_map[class_name] = class_idx
mapped_classes = np.array([class_map[value] for value in y_combined])
y_train = mapped_classes[:len(y_train)]
y_val = mapped_classes[len(y_train):]

# Create PyTorch datasets and dataloaders
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.long))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.long))
test_dataset = TensorDataset(torch.tensor(X_kaggle, dtype=torch.float32))


num_epochs = 10


def train_mlp_kaggle(batch_size, hidden_size, dropout_rate, learning_rate, weight_decay):

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Initialize MLP model
    input_size = X_train.shape[1]
    output_size = len(np.unique(y_train))  # Number of classes

    model = MLP(input_size, hidden_size, output_size, dropout_rate=dropout_rate)

    # Define optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Training loop
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = model.cost(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += targets.size(0)
            correct_train += (predicted == targets).sum().item()

        train_accuracy = correct_train / total_train
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss / len(train_loader):.4f}, Train Accuracy: {100 * train_accuracy:.2f}%")

        # Validation
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = model.cost(outputs, targets)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_val += targets.size(0)
                correct_val += (predicted == targets).sum().item()

        val_accuracy = correct_val / total_val

        print(f"Epoch {epoch+1}/{num_epochs}, Validation Loss: {val_loss / len(val_loader):.4f}, Validation Accuracy: {100 * val_accuracy:.2f}%")

        # simulate kaggle predictions here
        kaggle_preds_list = []
        with torch.no_grad():
          for inputs in test_loader:
            outputs = model(inputs[0])
            _, predicted = torch.max(outputs, 1)
            kaggle_preds_list.extend(predicted.tolist())
        for pred_idx in range(len(kaggle_preds_list)):
          for class_name, class_idx in class_map.items():
            if class_idx == kaggle_preds_list[pred_idx]:
              kaggle_preds_list[pred_idx] = class_name
              break
        pd.concat([kaggle_file_ids, pd.DataFrame(kaggle_preds_list, columns=['class'])], axis=1).to_csv(os.path.join(kaggle_pred_dir, f'{val_accuracy:.4f}-preds.csv'), index=False)



def train_mlp_raytune(config):

    batch_size=config['batch_size']
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)

    # Initialize MLP model
    input_size = X_train.shape[1]
    hidden_size = config['hidden_size']
    output_size = len(np.unique(y_train))  # Number of classes

    model = MLP(input_size, hidden_size, output_size, dropout_rate=config['dropout_rate'])

    # Define optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])

    checkpoint = get_checkpoint()
    if checkpoint:
        with checkpoint.as_directory() as checkpoint_dir:
            data_path = Path(checkpoint_dir) / "data.pkl"
            with open(data_path, "rb") as fp:
                checkpoint_state = pickle.load(fp)
            start_epoch = checkpoint_state["epoch"]
            model.load_state_dict(checkpoint_state["net_state_dict"])
            optimizer.load_state_dict(checkpoint_state["optimizer_state_dict"])
    else:
        start_epoch = 0

    # Training loop
    val_accuracy = 0
    for epoch in range(start_epoch, num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = model.cost(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += targets.size(0)
            correct_train += (predicted == targets).sum().item()

        train_accuracy = correct_train / total_train

        # Validation
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = model.cost(outputs, targets)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_val += targets.size(0)
                correct_val += (predicted == targets).sum().item()

        val_accuracy = correct_val / total_val

    checkpoint_data = {
            "epoch": epoch,
            "net_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
        }
    with tempfile.TemporaryDirectory() as checkpoint_dir:
        data_path = Path(checkpoint_dir) / "data.pkl"
        with open(data_path, "wb") as fp:
            pickle.dump(checkpoint_data, fp)

        checkpoint = Checkpoint.from_directory(checkpoint_dir)
        train.report(
            {"loss": val_loss, "accuracy": val_accuracy},
            checkpoint=checkpoint,
        )


# uncomment the code below to search hyperparameters with raytune
# ----------------------------------------------------------------------------
# hyperparameter_set = {
#     'batch_size': tune.grid_search([8, 16, 32]),
#     'hidden_size': tune.grid_search([32, 64, 128]),
#     'dropout_rate': tune.uniform(0.1, 0.5),
#     'learning_rate': tune.loguniform(1e-4, 1e-1),
#     'weight_decay': tune.loguniform(1e-6, 1e-2)
# }

# scheduler = ASHAScheduler(
#     max_t=num_epochs,
#     grace_period=1,
#     reduction_factor=2)

# tuner = tune.Tuner(
#     tune.with_resources(
#         tune.with_parameters(train_mlp_raytune),
#         resources={"cpu": 1, "gpu": 0}
#     ),
#     tune_config=tune.TuneConfig(
#         metric="accuracy",
#         mode="max",
#         scheduler=scheduler,
#         num_samples=20,
#     ),
#     param_space=hyperparameter_set,
# )
# results = tuner.fit()

# best_result = results.get_best_result("accuracy", "max")

# print("Best trial final validation loss: {}".format(
#     best_result.metrics["loss"]))
# print("Best trial config: {}".format(best_result.config))
# print("Best trial final validation accuracy: {}".format(
#     best_result.metrics["accuracy"]))
# ----------------------------------------------------------------------------


# uncomment the line below to test a particular hyperparameter configuration
train_mlp_kaggle(8, 64, 0.22755289383709132, 0.002992940728568832, 0.006695324606709262)

Epoch 1/10, Train Loss: 1.8281, Train Accuracy: 35.24%
Epoch 1/10, Validation Loss: 1.1979, Validation Accuracy: 63.70%
Epoch 2/10, Train Loss: 1.0233, Train Accuracy: 63.97%
Epoch 2/10, Validation Loss: 0.9287, Validation Accuracy: 70.00%
Epoch 3/10, Train Loss: 0.7068, Train Accuracy: 75.08%
Epoch 3/10, Validation Loss: 0.8188, Validation Accuracy: 68.52%
Epoch 4/10, Train Loss: 0.4692, Train Accuracy: 85.24%
Epoch 4/10, Validation Loss: 0.7756, Validation Accuracy: 72.96%
Epoch 5/10, Train Loss: 0.3989, Train Accuracy: 88.41%
Epoch 5/10, Validation Loss: 0.8608, Validation Accuracy: 70.74%
Epoch 6/10, Train Loss: 0.3462, Train Accuracy: 89.52%
Epoch 6/10, Validation Loss: 0.7932, Validation Accuracy: 74.07%
Epoch 7/10, Train Loss: 0.3034, Train Accuracy: 89.84%
Epoch 7/10, Validation Loss: 0.9107, Validation Accuracy: 75.19%
Epoch 8/10, Train Loss: 0.2741, Train Accuracy: 91.27%
Epoch 8/10, Validation Loss: 0.8904, Validation Accuracy: 74.07%
Epoch 9/10, Train Loss: 0.2370, Train Ac