In [1]:
# Needed to import custom code from other directories
import sys
sys.path.append('../../code')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F

from dataset import CSVDataset

from utils import LRAP, perfection
from train import Net

from tabulate import tabulate
from tqdm.auto import tqdm

%matplotlib inline

SEED = 42

NUM_FEATURES = 5000
NUM_CLASSES = 3993

model_4 = Net.load_from_checkpoint("../../code/saved_models/neural_network_4.ckpt") # 8192 units
model_4.freeze()

model_5 = Net.load_from_checkpoint("../../code/saved_models/neural_network_5.ckpt") # 16384 units
model_5.freeze()

In [2]:
model_4.hparams.lr

0.000363078054770101

In [58]:
validation_data = CSVDataset(
    "../../data/expanded/",
    csv_features="dev_features.csv",
    csv_labels="dev_labels.csv",
    standardize="../../code/saved_models/scaler.pkl",
)

validation_dataloader = torch.utils.data.DataLoader(
    validation_data, batch_size=model_4.hparams.batch_size
)

y_valid = pd.read_csv("../../data/expanded/dev_labels.csv", names=range(NUM_CLASSES))
y_valid = y_valid.to_numpy()

In [59]:
predictions_1 = pd.read_csv("../../public_data/nn_predictions_1.csv", names=range(NUM_CLASSES)).to_numpy()  # 2048
predictions_2 = pd.read_csv("../../public_data/nn_predictions_2.csv", names=range(NUM_CLASSES)).to_numpy()  # 4096
predictions_3 = pd.read_csv("../../public_data/nn_predictions_3.csv", names=range(NUM_CLASSES)).to_numpy()  # 6144

In [60]:
LRAP(y_valid, predictions_1)

0.6251477863040237

In [61]:
LRAP(y_valid, predictions_2)

0.630795128936494

In [62]:
LRAP(y_valid, predictions_3)

0.630657886172951

In [63]:
predictions_4 = []
for (features, _) in validation_dataloader:
    predictions_4.append(model_4.forward(features))
    
predictions_4 = torch.cat(predictions_4, dim=0)
predictions_4 = torch.sigmoid(predictions_4).numpy()

LRAP(y_valid, predictions_4)

0.6317192855131262

In [64]:
predictions_5 = []
for (features, _) in validation_dataloader:
    predictions_5.append(model_5.forward(features))
    
predictions_5 = torch.cat(predictions_5, dim=0)
predictions_5 = torch.sigmoid(predictions_5).numpy()

LRAP(y_valid, predictions_5)

0.6294285356502091

### Best was obtained using 1, 2 and 4

In [77]:
betas1 = []
betas2 = []
betas3 = []
for beta1 in np.arange(0.1, 0.9, 0.05):
    for beta2 in np.arange(0.1, 1 - beta1, 0.05):
        beta3 = 1 - (beta1 + beta2)
        betas1.append(beta1)
        betas2.append(beta2)
        betas3.append(beta3)

In [78]:
lraps = []
for (b1, b2, b3) in tqdm(zip(betas1, betas2, betas3)):
    ensemble = b1 * predictions_1 + b2 * predictions_2 + b3 * predictions_4
    lraps.append(LRAP(y_valid, ensemble))
    
d = {
    "beta_1": betas1,
    "beta_2": betas2,
    "beta_3": betas3,
    "LRAP": lraps
}

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [86]:
ensemble = 0.45 * predictions_1 + 0.2 * predictions_2 + 0.35 * predictions_4
LRAP(y_valid, ensemble)

0.6380150218351462

In [79]:
print(tabulate(d, headers="keys"))

  beta_1    beta_2    beta_3      LRAP
--------  --------  --------  --------
    0.1       0.1       0.8   0.635322
    0.1       0.15      0.75  0.635207
    0.1       0.2       0.7   0.635814
    0.1       0.25      0.65  0.635556
    0.1       0.3       0.6   0.635928
    0.1       0.35      0.55  0.636244
    0.1       0.4       0.5   0.63614
    0.1       0.45      0.45  0.636199
    0.1       0.5       0.4   0.636198
    0.1       0.55      0.35  0.636012
    0.1       0.6       0.3   0.636085
    0.1       0.65      0.25  0.635638
    0.1       0.7       0.2   0.635399
    0.1       0.75      0.15  0.635259
    0.1       0.8       0.1   0.634804
    0.1       0.85      0.05  0.633986
    0.15      0.1       0.75  0.635595
    0.15      0.15      0.7   0.636236
    0.15      0.2       0.65  0.636446
    0.15      0.25      0.6   0.636378
    0.15      0.3       0.55  0.636283
    0.15      0.35      0.5   0.636356
    0.15      0.4       0.45  0.636311
    0.15      0.45      0.

In [87]:
pd.DataFrame(predictions_1).astype("float16").to_csv("../../public_data/nn_predictions_1.csv", index=False, header=False)
pd.DataFrame(predictions_2).astype("float16").to_csv("../../public_data/nn_predictions_2.csv", index=False, header=False)
pd.DataFrame(predictions_4).astype("float16").to_csv("../../public_data/nn_predictions_4.csv", index=False, header=False)
pd.DataFrame(ensemble).astype("float16").to_csv("../../public_data/nn_ensemble.csv", index=False, header=False)