# Battle Predictions

The following notebook is intended to be used to train deep architectures on the task of predicting the battle outcome of a fight of two pokemons. This is part of the MCK Pokemon Hackathon challenge 2020.

----

## 0. Environmental setup

In [1]:
import torch
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import StandardScaler
import torch.utils.data as data_utils
from torch import nn
from torch.optim import AdamW
import time
import os
import copy
from sklearn.metrics import mean_squared_error

np.random.seed(1234)
random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7f659c662df0>

---

## 1. Read in data

Before, we start with the analyses we will read in the preprocessed battle data.

In [2]:
battles = pd.read_csv('../data/04_features/battles_preprocessed.csv')
battles.head()

Unnamed: 0,Name_1,Level_1,Price_1,HP_1,Attack_1,Defense_1,Sp_Atk_1,Sp_Def_1,Speed_1,Legendary_1,...,Poison_2,Psychic_2,Rock_2,Water_2,Night,Rain,Sunshine,Unknown,Windy,HPPR_1
0,Metapod,30,441,150,36,103,47,47,58,0,...,0,0,0,0,0,0,0,0,1,0.0
1,Pinsir,36,1227,196,304,237,129,164,197,0,...,0,0,0,0,0,0,0,1,0,0.0
2,Metapod,15,297,92,26,73,33,33,40,0,...,0,0,0,0,0,0,0,0,1,0.0
3,Pinsir,40,1401,228,346,270,146,187,224,0,...,0,0,0,0,0,0,0,1,0,0.938596
4,Pinsir,12,634,91,157,124,70,86,106,0,...,0,0,0,0,0,0,0,0,1,0.450549


Next, we will clean the data by putting the duplicate battles, which we will later add again to the training data.

In [3]:
aside = battles.loc[battles.duplicate_count==2,:].copy().drop('duplicate_count', axis=1)
battles = battles.loc[battles.duplicate_count==1,:].reset_index(drop=True).drop('duplicate_count', axis=1)
n_unique_battles = battles.shape[0]
print(n_unique_battles)

2682071


---

## 2. Data splitting

After having read in the data, we will now split the data into a training, validation and test set. This will enable us assess different models for the task on the test set, while selecting the models during training using the validation score. 

In [4]:
np.random.seed(1234)
idc = np.arange(n_unique_battles)
shuffled_idc = np.random.permutation(idc)
print(shuffled_idc)

[ 315557  206580 2373003 ...  165158 2548435  486191]


In [5]:
train_val_test_split = [0.7, 0.2, 0.1]

train_split_idx = int(n_unique_battles*train_val_test_split[0])
val_split_idx = int(n_unique_battles*(train_val_test_split[0]+train_val_test_split[1]))

train_battles = battles.iloc[shuffled_idc[:train_split_idx],:]
val_battles = battles.iloc[shuffled_idc[train_split_idx:val_split_idx], :]
test_battles = battles.iloc[shuffled_idc[val_split_idx:], :]

In [6]:
train_battles.describe()

Unnamed: 0,Level_1,Price_1,HP_1,Attack_1,Defense_1,Sp_Atk_1,Sp_Def_1,Speed_1,Legendary_1,Level_2,...,Poison_2,Psychic_2,Rock_2,Water_2,Night,Rain,Sunshine,Unknown,Windy,HPPR_1
count,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,...,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0,1877449.0
mean,50.00264,1780.242,332.9903,304.4913,287.3161,283.7575,281.0847,290.6018,0.02067433,49.98841,...,0.2205146,0.08988207,0.06890627,0.2137097,0.1998792,0.2000693,0.2,0.2000081,0.2000433,0.4209084
std,28.57739,1328.78,262.0745,251.1628,242.9625,243.6014,229.9843,242.3465,0.1422916,28.57526,...,0.4145938,0.2860128,0.2532947,0.4099243,0.3999095,0.4000521,0.4,0.4000062,0.4000325,0.456758
min,1.0,195.0,10.0,5.0,5.0,15.0,20.0,15.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,25.0,729.0,131.0,119.0,111.0,109.0,110.0,113.0,0.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,50.0,1356.0,253.0,222.0,207.0,200.0,203.0,208.0,0.0,50.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,75.0,2509.0,472.0,421.0,392.0,382.0,384.0,396.0,0.0,75.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,99.0,8106.0,2312.0,1865.0,2336.0,1685.0,1508.0,1595.0,1.0,99.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [7]:
val_battles.describe()

Unnamed: 0,Level_1,Price_1,HP_1,Attack_1,Defense_1,Sp_Atk_1,Sp_Def_1,Speed_1,Legendary_1,Level_2,...,Poison_2,Psychic_2,Rock_2,Water_2,Night,Rain,Sunshine,Unknown,Windy,HPPR_1
count,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,...,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0,536414.0
mean,49.984814,1778.619337,332.488647,304.010127,287.065308,283.583793,280.939722,290.53174,0.020732,49.993829,...,0.220494,0.089155,0.069273,0.214295,0.200772,0.19951,0.19998,0.199536,0.200202,0.420947
std,28.591657,1327.052255,261.759171,250.444679,242.72009,243.682674,229.856072,242.289306,0.142486,28.566771,...,0.41458,0.284968,0.253918,0.410333,0.400578,0.399632,0.399985,0.399652,0.400152,0.456716
min,1.0,195.0,10.0,5.0,5.0,15.0,20.0,15.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,25.0,728.0,130.0,119.0,111.0,109.0,110.0,113.0,0.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,50.0,1354.0,252.0,222.0,206.0,200.0,203.0,208.0,0.0,50.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,75.0,2509.0,471.0,420.0,391.0,381.0,384.0,395.0,0.0,75.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,99.0,8106.0,2312.0,1865.0,2336.0,1685.0,1508.0,1595.0,1.0,99.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [8]:
test_battles.describe()

Unnamed: 0,Level_1,Price_1,HP_1,Attack_1,Defense_1,Sp_Atk_1,Sp_Def_1,Speed_1,Legendary_1,Level_2,...,Poison_2,Psychic_2,Rock_2,Water_2,Night,Rain,Sunshine,Unknown,Windy,HPPR_1
count,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,...,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0,268208.0
mean,50.041811,1779.578018,333.187504,304.207507,287.110757,283.524257,280.968838,290.579155,0.02114,50.033783,...,0.219375,0.089658,0.069308,0.214117,0.198991,0.20024,0.199319,0.201351,0.200098,0.42052
std,28.564028,1328.896733,262.474662,251.098352,242.290139,243.091525,230.304347,241.968101,0.143852,28.588714,...,0.413824,0.285692,0.253978,0.410209,0.399242,0.400181,0.399489,0.401011,0.400075,0.456799
min,1.0,195.0,10.0,5.0,5.0,15.0,20.0,15.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,25.0,730.0,131.0,119.0,111.0,109.0,110.0,113.0,0.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,50.0,1355.0,253.0,222.0,207.0,200.0,203.0,208.0,0.0,50.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,75.0,2510.0,473.0,421.0,391.0,382.0,383.0,396.0,0.0,75.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
max,99.0,8106.0,2312.0,1865.0,2336.0,1685.0,1508.0,1595.0,1.0,99.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [9]:
n_unique_battles == len(train_battles) + len(val_battles) + len(test_battles)

True

In [10]:
train_battles = train_battles.append(aside, ignore_index=True)
len(train_battles)

1914541

## 3. Data preparation

After splitting the data, we will now perform some final data preparation to make it usable as input for in the pytorch framework.

First, we extract the labels for the individual data splits.

In [11]:
y_train = train_battles.BattleResult.copy()
y_val = val_battles.BattleResult.copy()
y_test = test_battles.BattleResult.copy()

Second, we drop the information in the data that is directly related to the response.

In [12]:
# drop response from train/test input
X_train = train_battles.drop(['BattleResult','HPPR_1','Name_1','Name_2','Battle_MainType'], axis=1)
X_val = val_battles.drop(['BattleResult','HPPR_1','Name_1','Name_2','Battle_MainType'], axis=1)
X_test = test_battles.drop(['BattleResult','HPPR_1','Name_1','Name_2','Battle_MainType'], axis=1)

Third, we standardize the continuous features.

In [13]:
scaler = StandardScaler()
cont_cols = ['Level_1','Price_1','Attack_1', 'Defense_1', 'Sp_Atk_1', 'Sp_Def_1', 'Speed_1', 'Level_2','Price_2','Attack_2', 'Defense_2', 'Sp_Atk_2', 'Sp_Def_2', 'Speed_2']
X_train_cont = X_train[cont_cols]
fitted_scaler = scaler.fit(X_train_cont)

In [14]:
X_train_cont_sc = fitted_scaler.transform(X_train_cont)
X_train[cont_cols] = X_train_cont_sc

In [15]:
X_val_cont_sc = fitted_scaler.transform(X_val[cont_cols])
X_val[cont_cols] = X_val_cont_sc

In [16]:
X_test_cont_sc = fitted_scaler.transform(X_test[cont_cols])
X_test[cont_cols] = X_test_cont_sc

Third, we pack those information into ``torch.data.Dataset`` types.

In [17]:
# check if GPU is available and set the device accordingly
def get_device():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')  
    return device

torch.cuda.get_device_name(0)

'Quadro T2000'

In [18]:
# Note that from here on we expect GPU to be available, if that is not the case 
# use torch.xxxTensor instead of torch.cuda.xxxTensor

train_tensors = data_utils.TensorDataset(
    torch.cuda.FloatTensor(np.array(X_train)), 
    torch.cuda.FloatTensor(np.array(y_train)))

val_tensors = data_utils.TensorDataset(
    torch.cuda.FloatTensor(np.array(X_val)), 
    torch.cuda.FloatTensor(np.array(y_val)))

test_tensors = data_utils.TensorDataset(
    torch.cuda.FloatTensor(np.array(X_test)), 
    torch.cuda.FloatTensor(np.array(y_test)))

---

## 4. Model training functions

In the following we will implement the required functions to actually train a ``torch.nn.Module`` on the created datasets

In [19]:
def train_regression_model(
        model,
        data_loaders_dict,
        loss_function,
        optimizer,
        num_epochs=100,
        device=None,
        early_stopping=20,
        output_dir=".",
):
    r""" Function to train a deep architecture on a regression task.

    Parameters
    ----------
    model : torch.nn.Module
        Model to be trained.

    data_loaders_dict : dict
        The `Dataloader`s used for the training, validation and potentially the testing of the model associated with the
        keys ``train``, ``val``, ``test``.

    loss_function : pytorch loss object
        A pytorch compatible loss function instance that is optimized during the training.

    optimizer : pytorch optimizer object
        An optimizer that is used to optimize the loss function, i.e. an instance of one of the classes defined in
        :py:mod:`torch.optim`.

    num_epochs : int
        The number of epochs the model is at most trained for

    device : :py:class:`~torch.device.Device`
        The device that is used for the computations.

    early_stopping : int
        The number of epochs the validation loss is supposed to be not decrease before the training is stopped even
        if the set maximum number of training epochs is not yet reached.

    output_dir : str
        The directory, where the training results i.a. checkpoints of the trained model are stored.


    Returns
    -------
    (fitted_model, fitting_history_dict) : tuple(:py:class:`~torch.nn.Module`, dict)
        [1] The best found model during the training procedure.
        [2] A dictionary with the kes ``train`` and ``val`` that displays the evolution of the training and validation
        loss during the training.
    """

    # Initialize a learning rate scheduler.
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, "min", verbose=True
    )

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    if device is None:
        device = get_device()
    else:
        pass

    print(device)
    since = time.time()

    model = model.to(device)

    val_loss_history = []
    train_loss_history = []
    fitting_history_dict = {"train": train_loss_history, "val": val_loss_history}

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = np.infty
    early_stopping_counter = 0

    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs - 1))
        print("-" * 70)

        if early_stopping_counter > early_stopping and early_stopping_counter > 0:
            print(
                "Stopped training because of no improvement of the validation score for "
                + str(early_stopping)
                + " epochs."
            )
            break

        # Each epoch has a training and validation phase
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_error = 0.0

            for index, data in enumerate(data_loaders_dict[phase]):
                inputs = data[0].type(torch.FloatTensor).to(device)
                labels = data[1].type(torch.FloatTensor).to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # Enable training
                with torch.set_grad_enabled(
                        phase == "train"
                ) and torch.autograd.set_detect_anomaly(False):

                    # Forward pass and calculate loss
                    outputs = model(inputs)
                    batch_size = labels.size(0)
                    loss = loss_function(
                        outputs.view(batch_size, -1), labels.view(batch_size, -1)
                    )

                # Backpropagation of the loss during the training phase
                if phase == "train":
                    with torch.autograd.set_detect_anomaly(False):
                        loss.backward()
                        optimizer.step()

                # Compute epoch statistics.
                running_loss += loss.item()
                running_error += np.sqrt(running_loss)

            epoch_loss = running_loss / len(data_loaders_dict[phase])
            epoch_roloss = np.sqrt(running_loss / len(data_loaders_dict[phase]))

            print(
                "{} {} loss: {:.6f} root of loss: {:.6f}".format(
                    phase, loss.__class__.__name__, epoch_loss, epoch_roloss
                )
            )

            # Deep copy the model if it has the best validation loss.
            # Thereby the best validation loss and not the potentially requested square root of it is used to determine
            # the superiority of a model. Due to the concavity of the square root function this has no influence
            # on the overall process.
            if phase == "val":
                scheduler.step(epoch_loss)
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model = copy.deepcopy(model)
                    best_model_wts = copy.deepcopy(model.state_dict())
                    torch.save(best_model, output_dir + "/best_model.pth")
                    torch.save(best_model_wts, output_dir + "/best_model_weights.pth")
                    early_stopping_counter = 0
                else:
                    early_stopping_counter += 1

            fitting_history_dict[phase].append(epoch_loss)

    time_elapsed = time.time() - since
    print(
        "Training complete in {:.0f}m {:.0f}s".format(
            time_elapsed // 60, time_elapsed % 60
        )
    )

    print("Best val loss : {:4f}".format(best_loss))

    # Load best model weights
    model.load_state_dict(best_model_wts)

    # Get test loss
    if "test" in data_loaders_dict.keys():
        running_loss = 0.0
        running_error = 0.0
        for index, data in enumerate(data_loaders_dict["test"]):
            inputs = data[0].type(torch.FloatTensor).to(device)
            labels = data[1].type(torch.FloatTensor).to(device)

            with torch.set_grad_enabled(False):
                # Forward pass
                outputs = model(inputs)
                outputs = outputs.view(-1)
                labels = labels.view(-1)
                loss = loss_function(outputs, labels)

            # Compute statistics.
            running_loss += loss.item()
            running_error += np.sqrt(running_loss)

        epoch_loss = running_loss / len(data_loaders_dict["test"])
        epoch_roloss = np.sqrt(running_loss / len(data_loaders_dict["test"]))

        print(
            "{} {} loss: {:.6f} root of loss: {:.6f}".format(
                "test", loss.__class__.__name__, epoch_loss, epoch_roloss
            )
        )
        print("-" * 70)
        print("-" * 70)

    return model, fitting_history_dict

In [20]:
def init_weights(m):
  if type(m) == nn.Linear:
    torch.nn.init.xavier_uniform_(m.weight)
    m.bias.data.fill_(0.01)

Finally, we create the dataloader and a dataloader dictionary.

In [21]:
train_loader = data_utils.DataLoader(train_tensors, 
                                   batch_size = 1024, shuffle = True)

train_eval_loader = data_utils.DataLoader(train_tensors, 
                                   batch_size = 1024, shuffle = False)

val_loader = data_utils.DataLoader(val_tensors, 
                                   batch_size = 1024, shuffle = False)

test_loader = data_utils.DataLoader(test_tensors, 
                                   batch_size = 1024, shuffle = False)

data_loaders_dict = {'train':train_loader, 'val':val_loader, 'test':test_loader}

---

## 5. Experiments

We have now prepared everything we need to run the first experiments. Therefore, we each time need to define a model architecture, choose a loss function, an optimizer and make decision concerning the early stopping criterium.


### 5.1. Small DNN

In [34]:
torch.manual_seed(1234)
device = get_device()
units = [512, 512, 512, 512, 512]

small_dnn = nn.Sequential()
small_dnn.add_module('dense_0',nn.Linear(X_train.shape[1], units[0]))
for i in range(1,len(units)):
    small_dnn.add_module('dense_{}'.format(i),nn.Linear(units[i-1], units[i]))
    small_dnn.add_module('norm_{}'.format(i), nn.BatchNorm1d(units[i]))
    small_dnn.add_module('relu_{}'.format(i), nn.ReLU())
small_dnn.add_module('out', nn.Linear(units[-1],1))

small_dnn.apply(init_weights)
small_dnn.to(device)
print(small_dnn)

Sequential(
  (dense_0): Linear(in_features=61, out_features=512, bias=True)
  (dense_1): Linear(in_features=512, out_features=512, bias=True)
  (norm_1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (dense_2): Linear(in_features=512, out_features=512, bias=True)
  (norm_2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (dense_3): Linear(in_features=512, out_features=512, bias=True)
  (norm_3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_3): ReLU()
  (dense_4): Linear(in_features=512, out_features=512, bias=True)
  (norm_4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_4): ReLU()
  (out): Linear(in_features=512, out_features=1, bias=True)
)


We now define the optimizer and the loss function.

In [35]:
params_to_update = small_dnn.parameters()
optimizer = AdamW(params_to_update, lr=1e-3, weight_decay=0.0)
loss_function = nn.MSELoss()

Finally, we define the output directory and the early stopping parameter.

In [38]:
early_stopping = 20
num_epochs = 200
output_dir = '../data/99_non_catalogued/dnn_m1/'

And finally we can run the training.

In [39]:
fitted_model, fitting_history = train_regression_model(
        model=small_dnn,
        data_loaders_dict=data_loaders_dict,
        loss_function=loss_function,
        optimizer=optimizer,
        num_epochs=100,
        device=None,
        early_stopping=early_stopping,
        output_dir=output_dir)

cuda:0
Epoch 0/99
----------------------------------------------------------------------
train Tensor loss: 918.396212 root of loss: 30.305053
val Tensor loss: 881.418357 root of loss: 29.688691
Epoch 1/99
----------------------------------------------------------------------
train Tensor loss: 892.588671 root of loss: 29.876222
val Tensor loss: 855.777579 root of loss: 29.253676
Epoch 2/99
----------------------------------------------------------------------
train Tensor loss: 875.935524 root of loss: 29.596208
val Tensor loss: 805.959714 root of loss: 28.389430
Epoch 3/99
----------------------------------------------------------------------
train Tensor loss: 870.213625 root of loss: 29.499383
val Tensor loss: 815.328986 root of loss: 28.553966
Epoch 4/99
----------------------------------------------------------------------
train Tensor loss: 865.631055 root of loss: 29.421609
val Tensor loss: 815.831488 root of loss: 28.562764
Epoch 5/99
------------------------------------------

train Tensor loss: 734.597702 root of loss: 27.103463
val Tensor loss: 707.266001 root of loss: 26.594473
Epoch 44/99
----------------------------------------------------------------------
train Tensor loss: 725.495372 root of loss: 26.935021
val Tensor loss: 704.879787 root of loss: 26.549572
Epoch 45/99
----------------------------------------------------------------------
train Tensor loss: 732.125772 root of loss: 27.057823
val Tensor loss: 721.920574 root of loss: 26.868580
Epoch 46/99
----------------------------------------------------------------------
train Tensor loss: 730.191383 root of loss: 27.022054
val Tensor loss: 716.390550 root of loss: 26.765473
Epoch 47/99
----------------------------------------------------------------------
train Tensor loss: 727.848415 root of loss: 26.978666
val Tensor loss: 718.681141 root of loss: 26.808229
Epoch 48/99
----------------------------------------------------------------------
train Tensor loss: 722.809026 root of loss: 26.885108
v

train Tensor loss: 716.025520 root of loss: 26.758653
val Tensor loss: 705.262181 root of loss: 26.556773
Epoch 87/99
----------------------------------------------------------------------
train Tensor loss: 721.935626 root of loss: 26.868860
val Tensor loss: 706.263684 root of loss: 26.575622
Epoch 88/99
----------------------------------------------------------------------
Stopped training because of no improvement of the validation score for 20 epochs.
Training complete in 41m 23s
Best val loss : 700.060491
test Tensor loss: 696.601187 root of loss: 26.393203
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


---

### 5.2. Medium DNN

In [22]:
torch.manual_seed(1234)
device = get_device()
units = [512, 512, 512, 512, 512, 512, 512]

medium_dnn = nn.Sequential()
medium_dnn.add_module('dense_0',nn.Linear(X_train.shape[1], units[0]))
for i in range(1,len(units)):
    medium_dnn.add_module('dense_{}'.format(i),nn.Linear(units[i-1], units[i]))
    medium_dnn.add_module('norm_{}'.format(i), nn.BatchNorm1d(units[i]))
    medium_dnn.add_module('relu_{}'.format(i), nn.ReLU())
medium_dnn.add_module('out', nn.Linear(units[-1],1))

medium_dnn.apply(init_weights)
medium_dnn.to(device)
print(medium_dnn)

Sequential(
  (dense_0): Linear(in_features=61, out_features=512, bias=True)
  (dense_1): Linear(in_features=512, out_features=512, bias=True)
  (norm_1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (dense_2): Linear(in_features=512, out_features=512, bias=True)
  (norm_2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (dense_3): Linear(in_features=512, out_features=512, bias=True)
  (norm_3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_3): ReLU()
  (dense_4): Linear(in_features=512, out_features=512, bias=True)
  (norm_4): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_4): ReLU()
  (dense_5): Linear(in_features=512, out_features=512, bias=True)
  (norm_5): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_5): ReLU()
  (dense_6): Linear(in_features=5

We now define the optimizer and the loss function.

In [29]:
params_to_update = medium_dnn.parameters()
optimizer = AdamW(params_to_update, lr=1e-3, weight_decay=0.0)
loss_function = nn.MSELoss()

Finally, we define the output directory and the early stopping parameter.

In [30]:
early_stopping = 20
num_epochs = 100
output_dir = '../data/99_non_catalogued/dnn_m2/'

And finally we can run the training.

In [31]:
fitted_model, fitting_history = train_regression_model(
        model=medium_dnn,
        data_loaders_dict=data_loaders_dict,
        loss_function=loss_function,
        optimizer=optimizer,
        num_epochs=num_epochs,
        device=None,
        early_stopping=early_stopping,
        output_dir=output_dir)

cuda:0
Epoch 0/99
----------------------------------------------------------------------
train Tensor loss: 1102.872542 root of loss: 33.209525
val Tensor loss: 1160.817235 root of loss: 34.070768
Epoch 1/99
----------------------------------------------------------------------
train Tensor loss: 1073.195569 root of loss: 32.759664
val Tensor loss: 2454.692765 root of loss: 49.544856
Epoch 2/99
----------------------------------------------------------------------
train Tensor loss: 1078.732792 root of loss: 32.844068
val Tensor loss: 1393.757479 root of loss: 37.333061
Epoch 3/99
----------------------------------------------------------------------
train Tensor loss: 1067.042490 root of loss: 32.665616
val Tensor loss: 1043.968571 root of loss: 32.310502
Epoch 4/99
----------------------------------------------------------------------
train Tensor loss: 1065.507997 root of loss: 32.642120
val Tensor loss: 953.825943 root of loss: 30.884073
Epoch 5/99
---------------------------------

train Tensor loss: 585.567028 root of loss: 24.198492
val Tensor loss: 695.386746 root of loss: 26.370187
Epoch 44/99
----------------------------------------------------------------------
train Tensor loss: 587.945257 root of loss: 24.247582
val Tensor loss: 638.677491 root of loss: 25.272069
Epoch 45/99
----------------------------------------------------------------------
train Tensor loss: 583.685204 root of loss: 24.159578
val Tensor loss: 689.181064 root of loss: 26.252258
Epoch 46/99
----------------------------------------------------------------------
train Tensor loss: 581.719075 root of loss: 24.118853
val Tensor loss: 661.066207 root of loss: 25.711208
Epoch 47/99
----------------------------------------------------------------------
train Tensor loss: 586.578452 root of loss: 24.219382
val Tensor loss: 660.617697 root of loss: 25.702484
Epoch 48/99
----------------------------------------------------------------------
train Tensor loss: 572.955901 root of loss: 23.936497
v

---

### 5.3. Tiny DNN

In [27]:
torch.manual_seed(1234)
device = get_device()
units = [512, 512, 512, 512]

tiny_dnn = nn.Sequential()
tiny_dnn.add_module('dense_0',nn.Linear(X_train.shape[1], units[0]))
for i in range(1,len(units)):
    tiny_dnn.add_module('dense_{}'.format(i),nn.Linear(units[i-1], units[i]))
    tiny_dnn.add_module('norm_{}'.format(i), nn.BatchNorm1d(units[i]))
    tiny_dnn.add_module('relu_{}'.format(i), nn.ReLU())
tiny_dnn.add_module('out', nn.Linear(units[-1],1))

tiny_dnn.apply(init_weights)
tiny_dnn.to(device)
print(tiny_dnn)

Sequential(
  (dense_0): Linear(in_features=61, out_features=512, bias=True)
  (dense_1): Linear(in_features=512, out_features=512, bias=True)
  (norm_1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (dense_2): Linear(in_features=512, out_features=512, bias=True)
  (norm_2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (dense_3): Linear(in_features=512, out_features=512, bias=True)
  (norm_3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_3): ReLU()
  (out): Linear(in_features=512, out_features=1, bias=True)
)


We now define the optimizer and the loss function.

In [28]:
params_to_update = tiny_dnn.parameters()
optimizer = AdamW(params_to_update, lr=1e-3, weight_decay=0.0)
loss_function = nn.MSELoss()

Finally, we define the output directory and the early stopping parameter.

In [29]:
early_stopping = 20
num_epochs = 200
output_dir = '../data/99_non_catalogued/dnn_m3/'

And finally we can run the training.

In [30]:
fitted_model, fitting_history = train_regression_model(
        model=tiny_dnn,
        data_loaders_dict=data_loaders_dict,
        loss_function=loss_function,
        optimizer=optimizer,
        num_epochs=num_epochs,
        device=None,
        early_stopping=early_stopping,
        output_dir=output_dir)

cuda:0
Epoch 0/199
----------------------------------------------------------------------
train Tensor loss: 38221.495313 root of loss: 195.503185
val Tensor loss: 5500.130947 root of loss: 74.162868
Epoch 1/199
----------------------------------------------------------------------
train Tensor loss: 3827.354630 root of loss: 61.865618
val Tensor loss: 4592.416506 root of loss: 67.767371
Epoch 2/199
----------------------------------------------------------------------
train Tensor loss: 3296.990182 root of loss: 57.419423
val Tensor loss: 7739.951804 root of loss: 87.976996
Epoch 3/199
----------------------------------------------------------------------
train Tensor loss: 2936.769357 root of loss: 54.191968
val Tensor loss: 2998.952043 root of loss: 54.762688
Epoch 4/199
----------------------------------------------------------------------
train Tensor loss: 2623.798537 root of loss: 51.223027
val Tensor loss: 2389.925378 root of loss: 48.886863
Epoch 5/199
------------------------

train Tensor loss: 1142.897570 root of loss: 33.806768
val Tensor loss: 886.337654 root of loss: 29.771423
Epoch 43/199
----------------------------------------------------------------------
train Tensor loss: 1146.007420 root of loss: 33.852731
val Tensor loss: 939.979813 root of loss: 30.659090
Epoch 44/199
----------------------------------------------------------------------
train Tensor loss: 1129.468034 root of loss: 33.607559
val Tensor loss: 863.827043 root of loss: 29.390935
Epoch 45/199
----------------------------------------------------------------------
train Tensor loss: 1136.682490 root of loss: 33.714722
val Tensor loss: 891.712609 root of loss: 29.861557
Epoch 46/199
----------------------------------------------------------------------
train Tensor loss: 1124.637826 root of loss: 33.535620
val Tensor loss: 875.905860 root of loss: 29.595707
Epoch 47/199
----------------------------------------------------------------------
train Tensor loss: 1115.330296 root of loss: 

train Tensor loss: 1008.208630 root of loss: 31.752301
val Tensor loss: 805.833266 root of loss: 28.387203
Epoch    85: reducing learning rate of group 0 to 1.0000e-06.
Epoch 86/199
----------------------------------------------------------------------
train Tensor loss: 1009.012570 root of loss: 31.764958
val Tensor loss: 814.057242 root of loss: 28.531688
Epoch 87/199
----------------------------------------------------------------------
train Tensor loss: 1009.458827 root of loss: 31.771982
val Tensor loss: 816.015322 root of loss: 28.565982
Epoch 88/199
----------------------------------------------------------------------
train Tensor loss: 1007.013428 root of loss: 31.733475
val Tensor loss: 819.682197 root of loss: 28.630093
Epoch 89/199
----------------------------------------------------------------------
train Tensor loss: 1014.045913 root of loss: 31.844088
val Tensor loss: 801.089702 root of loss: 28.303528
Epoch 90/199
------------------------------------------------------

---

### 5.4. Tiny DNN (256)

In [None]:
torch.manual_seed(1234)
device = get_device()
units = [256, 256, 256, 256]

tiny_dnn_256 = nn.Sequential()
tiny_dnn_256.add_module('dense_0',nn.Linear(X_train.shape[1], units[0]))
for i in range(1,len(units)):
    tiny_dnn_256.add_module('dense_{}'.format(i),nn.Linear(units[i-1], units[i]))
    tiny_dnn_256.add_module('norm_{}'.format(i), nn.BatchNorm1d(units[i]))
    tiny_dnn_256.add_module('relu_{}'.format(i), nn.ReLU())
tiny_dnn_256.add_module('out', nn.Linear(units[-1],1))

tiny_dnn_256.apply(init_weights)
tiny_dnn_256.to(device)
print(tiny_dnn_256)

We now define the optimizer and the loss function.

In [None]:
params_to_update = tiny_dnn_256.parameters()
optimizer = AdamW(params_to_update, lr=1e-3, weight_decay=0.0)
loss_function = nn.MSELoss()

Finally, we define the output directory and the early stopping parameter.

In [None]:
early_stopping = 20
num_epochs = 200
output_dir = '../data/99_non_catalogued/dnn_m4/'

And finally we can run the training.

In [None]:
fitted_model, fitting_history = train_regression_model(
        model=tiny_dnn,
        data_loaders_dict=data_loaders_dict,
        loss_function=loss_function,
        optimizer=optimizer,
        num_epochs=num_epochs,
        device=None,
        early_stopping=early_stopping,
        output_dir=output_dir)

