In [1]:
# Needed to import custom code from other directories
import sys
sys.path.append('../../code')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F

from dataset import CSVDataset

from utils import LRAP, perfection
from train import Net

%matplotlib inline

SEED = 42

NUM_FEATURES = 5000
NUM_CLASSES = 3993

model_1 = Net.load_from_checkpoint("../../code/saved_models/neural_network.ckpt")
model_1.freeze()

model_2 = Net.load_from_checkpoint("../../code/saved_models/neural_network_2.ckpt")
model_2.freeze()

model_3 = Net.load_from_checkpoint("../../code/saved_models/neural_network_3.ckpt")
model_3.freeze()

In [2]:
validation_data = CSVDataset(
    "../../data/expanded/",
    csv_features="dev_features.csv",
    csv_labels="dev_labels.csv",
    standardize="../../code/saved_models/scaler.pkl",
)

validation_dataloader = torch.utils.data.DataLoader(
    validation_data, batch_size=model_1.hparams.batch_size
)

y_valid = pd.read_csv("../../data/expanded/dev_labels.csv", names=range(NUM_CLASSES))
y_valid = y_valid.to_numpy()

In [3]:
predictions_1 = []
for (features, _) in validation_dataloader:
    predictions_1.append(model_1.forward(features))
    
predictions_1 = torch.cat(predictions_1, dim=0)
predictions_1 = torch.sigmoid(predictions_1).numpy()

LRAP(y_valid, predictions_1)

0.6254337395360583

In [4]:
predictions_2 = []
for (features, _) in validation_dataloader:
    predictions_2.append(model_2.forward(features))
    
predictions_2 = torch.cat(predictions_2, dim=0)
predictions_2 = torch.sigmoid(predictions_2).numpy()

LRAP(y_valid, predictions_2)

0.6309594621665222

In [5]:
predictions_3 = []
for (features, _) in validation_dataloader:
    predictions_3.append(model_3.forward(features))
    
predictions_3 = torch.cat(predictions_3, dim=0)
predictions_3 = torch.sigmoid(predictions_3).numpy()

LRAP(y_valid, predictions_3)

0.6309080790825551

### Model 1 vs 2

In [6]:
betas =  np.arange(0.1, 1, 0.05)
for beta in betas:
    ensemble = (beta * predictions_1) + ((1 - beta) * predictions_2)
    print(f"(beta={beta}) LRAP: {LRAP(y_valid, ensemble)}")

(beta=0.1) LRAP: 0.6332797202502507
(beta=0.15000000000000002) LRAP: 0.6342625478929855
(beta=0.20000000000000004) LRAP: 0.6342240855069103
(beta=0.25000000000000006) LRAP: 0.6346359196735039
(beta=0.30000000000000004) LRAP: 0.6347362352810735
(beta=0.3500000000000001) LRAP: 0.634983094385877
(beta=0.40000000000000013) LRAP: 0.6347188364623025
(beta=0.45000000000000007) LRAP: 0.6343545130373217
(beta=0.5000000000000001) LRAP: 0.6337572898431053
(beta=0.5500000000000002) LRAP: 0.634080575325971
(beta=0.6000000000000002) LRAP: 0.6339205444701758
(beta=0.6500000000000001) LRAP: 0.633643227097654
(beta=0.7000000000000002) LRAP: 0.6329075556136383
(beta=0.7500000000000002) LRAP: 0.6325937474595223
(beta=0.8000000000000002) LRAP: 0.631951758906373
(beta=0.8500000000000002) LRAP: 0.6307677585567292
(beta=0.9000000000000002) LRAP: 0.6292446045849288
(beta=0.9500000000000003) LRAP: 0.6276172056163574


### Model 1 vs 3

In [7]:
betas =  np.arange(0.1, 1, 0.05)
for beta in betas:
    ensemble = (beta * predictions_2) + ((1 - beta) * predictions_3)
    print(f"(beta={beta}) LRAP: {LRAP(y_valid, ensemble)}")

(beta=0.1) LRAP: 0.6320920534055976
(beta=0.15000000000000002) LRAP: 0.6325836448128798
(beta=0.20000000000000004) LRAP: 0.6326709099755596
(beta=0.25000000000000006) LRAP: 0.6332985990640884
(beta=0.30000000000000004) LRAP: 0.6337515629387955
(beta=0.3500000000000001) LRAP: 0.6340260534453079
(beta=0.40000000000000013) LRAP: 0.634436934437014
(beta=0.45000000000000007) LRAP: 0.6347761277216853
(beta=0.5000000000000001) LRAP: 0.6352517050319257
(beta=0.5500000000000002) LRAP: 0.6353258684938988
(beta=0.6000000000000002) LRAP: 0.6347310765417643
(beta=0.6500000000000001) LRAP: 0.6346071826110886
(beta=0.7000000000000002) LRAP: 0.6342108177232018
(beta=0.7500000000000002) LRAP: 0.6342675331746361
(beta=0.8000000000000002) LRAP: 0.6333524526932748
(beta=0.8500000000000002) LRAP: 0.633214075564569
(beta=0.9000000000000002) LRAP: 0.632705389665642
(beta=0.9500000000000003) LRAP: 0.6319769521439863


### Model 2 vs 3

In [8]:
betas =  np.arange(0.1, 1, 0.05)
for beta in betas:
    ensemble = (beta * predictions_1) + ((1 - beta) * predictions_3)
    print(f"(beta={beta}) LRAP: {LRAP(y_valid, ensemble)}")

(beta=0.1) LRAP: 0.6332364366342393
(beta=0.15000000000000002) LRAP: 0.6344045670899703
(beta=0.20000000000000004) LRAP: 0.6350705649137446
(beta=0.25000000000000006) LRAP: 0.6355727511829187
(beta=0.30000000000000004) LRAP: 0.6358699459244713
(beta=0.3500000000000001) LRAP: 0.6358365378331927
(beta=0.40000000000000013) LRAP: 0.6360667906978956
(beta=0.45000000000000007) LRAP: 0.6355786985066343
(beta=0.5000000000000001) LRAP: 0.6354695449876373
(beta=0.5500000000000002) LRAP: 0.635307976867946
(beta=0.6000000000000002) LRAP: 0.6344322173711388
(beta=0.6500000000000001) LRAP: 0.6341130099221854
(beta=0.7000000000000002) LRAP: 0.6332839611061003
(beta=0.7500000000000002) LRAP: 0.632207022252148
(beta=0.8000000000000002) LRAP: 0.6316344171558514
(beta=0.8500000000000002) LRAP: 0.6311327750344585
(beta=0.9000000000000002) LRAP: 0.6301826027003959
(beta=0.9500000000000003) LRAP: 0.6284259515765099


### Model 1 vs 2 vs 3

In [28]:
from tabulate import tabulate
from tqdm.auto import tqdm

In [40]:
betas1 = []
betas2 = []
betas3 = []
for beta1 in np.arange(0.1, 0.9, 0.05):
    for beta2 in np.arange(0.1, 1 - beta1, 0.05):
        beta3 = 1 - (beta1 + beta2)
        betas1.append(beta1)
        betas2.append(beta2)
        betas3.append(beta3)

In [41]:
lraps = []
for (b1, b2, b3) in tqdm(zip(betas1, betas2, betas3)):
    ensemble = b1 * predictions_1 + b2 * predictions_2 + b3 * predictions_3
    lraps.append(LRAP(y_valid, ensemble))
    
d = {
    "beta_1": betas1,
    "beta_2": betas2,
    "beta_3": betas3,
    "LRAP": lraps
}

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [42]:
print(tabulate(d, headers="keys"))

  beta_1    beta_2    beta_3      LRAP
--------  --------  --------  --------
    0.1       0.1       0.8   0.634304
    0.1       0.15      0.75  0.634577
    0.1       0.2       0.7   0.634937
    0.1       0.25      0.65  0.635156
    0.1       0.3       0.6   0.635765
    0.1       0.35      0.55  0.635966
    0.1       0.4       0.5   0.636234
    0.1       0.45      0.45  0.636523
    0.1       0.5       0.4   0.636268
    0.1       0.55      0.35  0.636124
    0.1       0.6       0.3   0.635614
    0.1       0.65      0.25  0.635327
    0.1       0.7       0.2   0.634999
    0.1       0.75      0.15  0.63452
    0.1       0.8       0.1   0.634194
    0.1       0.85      0.05  0.633777
    0.15      0.1       0.75  0.635303
    0.15      0.15      0.7   0.634976
    0.15      0.2       0.65  0.63571
    0.15      0.25      0.6   0.636342
    0.15      0.3       0.55  0.636433
    0.15      0.35      0.5   0.636805
    0.15      0.4       0.45  0.637043
    0.15      0.45      0.4

In [43]:
max(lraps)

0.6373549839954993

In [44]:
ensemble = 0.3 * predictions_1 + 0.2 * predictions_2 + 0.5 * predictions_3

In [47]:
pd.DataFrame(predictions_1).astype("float16").to_csv("nn_predictions_1.csv", index=False, header=False)
pd.DataFrame(predictions_2).astype("float16").to_csv("nn_predictions_2.csv", index=False, header=False)
pd.DataFrame(predictions_3).astype("float16").to_csv("nn_predictions_3.csv", index=False, header=False)
pd.DataFrame(ensemble).astype("float16").to_csv("nn_ensemble.csv", index=False, header=False)