## Residual layers worked well on a single model

Lb at `0.01856`

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!rm train_targets_folds.csv
!wget https://github.com/Mayukhdeb/moa-classification-with-model-blending/raw/main/folds/train_targets_folds.csv

In [None]:
!pip install iterative-stratification

In [None]:
!pip install optuna

In [None]:
pd.read_csv("./train_targets_folds.csv").head()

In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import RandomOverSampler

import numpy as np 
import pandas as pd 

from IPython.display import clear_output
import random 
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter1d   ## smoother
from tqdm.notebook import tqdm, tnrange
import os

plt.rcParams['figure.figsize'] = 15, 7

CGREEN  = '\33[32m'
CBLUE =  '\033[34m'
CRED = '\033[1;31m'
CEND  = '\33[0m'

def seed_everything(seed=1903):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    print("seed kar diya")

In [None]:
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'
    
device


In [None]:

def preprocess(df, select_features):
    df = df.copy()
    top_features = [ 0, 1, 2,   3,   4,   5,   6,   7,   9,  11,  14,  15,  16,  17,
        18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  29,  30,  31,
        32,  33,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  46,
        47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  58,  59,  60,
        61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
        74,  75,  76,  78,  79,  80,  81,  82,  83,  84,  86,  87,  88,
        89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101,
       102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
       115, 116, 117, 118, 120, 121, 122, 123, 124, 125, 126, 127, 128,
       129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 149, 150, 151, 152, 153, 154, 155, 156, 157,
       158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
       171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
       184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 197,
       198, 199, 200, 202, 203, 204, 205, 206, 208, 209, 210, 211, 212,
       213, 214, 215, 216, 217, 218, 219, 220, 221, 223, 224, 225, 226,
       227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
       240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
       254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266,
       267, 268, 269, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280,
       281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 294,
       295, 296, 298, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309,
       310, 311, 312, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323,
       324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
       337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349,
       350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362,
       363, 364, 365, 366, 367, 368, 369, 370, 371, 374, 375, 376, 377,
       378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 390, 391,
       392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404,
       405, 406, 407, 408, 409, 411, 412, 413, 414, 415, 416, 417, 418,
       419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431,
       432, 434, 435, 436, 437, 438, 439, 440, 442, 443, 444, 445, 446,
       447, 448, 449, 450, 453, 454, 456, 457, 458, 459, 460, 461, 462,
       463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475,
       476, 477, 478, 479, 481, 482, 483, 484, 485, 486, 487, 488, 489,
       490, 491, 492, 493, 494, 495, 496, 498, 500, 501, 502, 503, 505,
       506, 507, 509, 510, 511, 512, 513, 514, 515, 518, 519, 520, 521,
       522, 523, 524, 525, 526, 527, 528, 530, 531, 532, 534, 535, 536,
       538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 549, 550, 551,
       552, 554, 557, 559, 560, 561, 562, 565, 566, 567, 568, 569, 570,
       571, 572, 573, 574, 575, 577, 578, 580, 581, 582, 583, 584, 585,
       586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 599,
       600, 601, 602, 606, 607, 608, 609, 611, 612, 613, 615, 616, 617,
       618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630,
       631, 632, 633, 634, 635, 636, 637, 638, 639, 641, 642, 643, 644,
       645, 646, 647, 648, 649, 650, 651, 652, 654, 655, 656, 658, 659,
       660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672,
       673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685,
       686, 687, 688, 689, 691, 692, 693, 694, 695, 696, 697, 699, 700,
       701, 702, 704, 705, 707, 708, 709, 710, 711, 713, 714, 716, 717,
       718, 720, 721, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732,
       733, 734, 735, 737, 738, 739, 740, 742, 743, 744, 745, 746, 747,
       748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 759, 760, 761,
       762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774,
       775, 776, 777, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788,
       789, 790, 792, 793, 794, 795, 796, 797, 798, 800, 801, 802, 803,
       804, 805, 806, 808, 809, 811, 813, 814, 815, 816, 817, 818, 819,
       821, 822, 823, 825, 826, 827, 828, 829, 830, 831, 832, 834, 835,
       837, 838, 839, 840, 841, 842, 845, 846, 847, 848, 850, 851, 852,
       854, 855, 856, 858, 859, 860, 861, 862, 864, 866, 867, 868, 869,
       870, 871, 872, 873, 874]
    all_columns = df.columns

    if select_features == True:
        df=df[all_columns[top_features]]
    else:
        pass
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    df.loc[:, 'cp_time'] = df.loc[:, 'cp_time'].map({ 24: 0, 48: 0.5, 72: 1})
    
    return df

In [None]:
class MoaDataset(Dataset):
    def __init__(self,features,targets):
        
        self.features  = features
        self.targets = targets
       
        
    def sizes(self):
        print("features size = ", self.features.shape[1])
        print("targets size = ", self.targets.shape[1])
        
    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, item):
        return {
            "x": torch.tensor(self.features[item, :], dtype=torch.float),
            "y": torch.tensor(self.targets[item, :], dtype=torch.float)
        }

In [None]:

class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self):
        super(LabelSmoothingCrossEntropy, self).__init__()
    def forward(self, x, target, smoothing=0.0008):
        confidence = 1. - smoothing
        logprobs = F.log_softmax(x, dim=-1)
        bcs_loss = nn.BCEWithLogitsLoss()(x, target)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = confidence * bcs_loss + smoothing * smooth_loss
        return loss.mean()
    
    
class Engine:
      def __init__(self, model, optimizer, device, scheduler):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.scheduler = scheduler
        
       
      @staticmethod
      def loss_fn_backprop(targets, outputs):
        return LabelSmoothingCrossEntropy()(outputs, targets)
    
      @staticmethod
      def loss_fn_stop(targets, outputs):
        return nn.BCEWithLogitsLoss()(outputs, targets)

      def train(self, data_loader):
        self.model.train()
        final_loss=0
        for data in data_loader:
            self.optimizer.zero_grad()
            inputs = data["x"].to(self.device)
            targets = data["y"].to(self.device)
            outputs = self.model(inputs)
            loss = self.loss_fn_backprop(targets, outputs)
            loss_stop = self.loss_fn_stop(targets, outputs)
            loss.backward()
            
            self.optimizer.step()
            
            final_loss += loss_stop.item()
        self.scheduler.step(1.)
        return(final_loss / len(data_loader))


      def evaluate(self, data_loader):
        self.model.eval()
        final_loss=0
        for data in data_loader:
            #self.optimizer.zero_grad()
            inputs = data["x"].to(self.device)
            targets = data["y"].to(self.device)
            outputs = self.model(inputs)
            loss = self.loss_fn_stop(targets, outputs)
            #loss.backward()
            #self.optimizer.step()
            final_loss += loss.item()
        return(final_loss / len(data_loader))

In [None]:
df_main = pd.read_csv('../input/lish-moa/train_features.csv')

    

def run_training(model_class ,fold, save_model=True, select_features = False, model_id = 0, lr = 0.004299882049752947, decay_factor = 0.1, patience = 7, df = df_main, num_epochs = 100):
    
    df = preprocess(df, select_features = select_features)
    targets_df = pd.read_csv('./train_targets_folds.csv')

    feature_columns = df.drop("sig_id", axis=1).columns
    target_columns = targets_df.drop(["sig_id", "kfold"], axis=1).columns

    

    df = df.merge(targets_df, on="sig_id", how="left")
    df = df[df.kfold != "hold"]

    train_df = df[df.kfold != str(fold)].reset_index(drop=True)
    valid_df = df[df.kfold == str(fold)].reset_index(drop=True)

    
    x_train = train_df[feature_columns].to_numpy()
    y_train = train_df[target_columns].to_numpy()
    
    x_val = valid_df[feature_columns].to_numpy()
    y_val = valid_df[target_columns].to_numpy()
    
    train_dataset = MoaDataset(features = x_train, targets=y_train)
    valid_dataset = MoaDataset(features = x_val, targets=y_val)
    
    train_loader = DataLoader(
        train_dataset, batch_size=128, shuffle=True
    )
    
    val_loader = DataLoader(
        valid_dataset, batch_size=1024, shuffle=True
    )
    
    model = model_class()
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), 
                                       lr = lr, 
                                       weight_decay=1e-5)
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                 mode='min', 
                                                 factor=decay_factor, ## wooo hoo
                                                 patience= patience, ## was 3 for 158 
                                                 eps=1e-4, 
                                                 verbose=True)
    criterion = nn.BCEWithLogitsLoss()
    eng = Engine(model, optimizer, 'cuda',scheduler)
    
    best_loss = 999
    early_stop_iter = 15
    early_stop_count=0

    for epoch in range(num_epochs):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(val_loader)
        #print("train_loss:", train_loss, "val_loss:", valid_loss)
        if valid_loss<best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), "model_"+str(fold + model_id) +".pth")
                pass
        else:
            early_stop_count +=1
            
        if early_stop_count>early_stop_iter:
            break
                    
    return(best_loss)


In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        
        
        self.batch_norm1 = nn.BatchNorm1d(785)
        self.dense1 = nn.utils.weight_norm(nn.Linear(785, 880))
        
        self.batch_norm2 = nn.BatchNorm1d(880)
        self.dense2 = nn.utils.weight_norm(nn.Linear(880, 880))
    
#         self.batch_norm3 = nn.BatchNorm1d(880)
#         self.dense3 = nn.utils.weight_norm(nn.Linear(880, 880))
        
        
        self.batch_norm_out = nn.BatchNorm1d(880)
        self.dense_out = nn.utils.weight_norm(nn.Linear(880, 206))
    
    def forward(self, x):
        
        x_1 = self.batch_norm1(x)
        x_1_res = self.dense1(x_1)
        x_1 = F.leaky_relu(x_1_res)
        
        x_2 = self.batch_norm2(x_1)
        x_2_res = self.dense2(x_2)
        x_2 = F.leaky_relu(x_2_res + x_1_res) 
    
        x_3 = self.batch_norm2(x_2)
        x_3_res = self.dense2(x_3)
        x_3 = F.leaky_relu(x_3_res + x_2_res + x_1_res) 
        
        x_out = self.batch_norm_out(x_3)
        x_out = self.dense_out(x_out)
        return x_out

In [None]:
# def objective(trial):
#     params = {
#         "lr": trial.suggest_uniform("lr", 1e-3 , 5e-3 ),
# #         "decay_factor" : trial.suggest_uniform("decay_factor", 0.0, 0.5),
#     }
    
#     losses = np.zeros(1)
#     for fold in range(1):
#         loss = run_training(
#             model_class = Model,
#             fold = fold,
#             save_model = False,
#             select_features = True,
#             lr = params["lr"],
#             decay_factor = 0.1,
#             patience  = 7,
#             num_epochs = 2
#         )
#         losses[fold] = loss
        
#     final_loss = losses.mean()
#     return final_loss

In [None]:
# import optuna

In [None]:
# study = optuna.create_study(direction="minimize")

In [None]:

# study.optimize(objective, n_trials=100)

In [None]:
# study.best_params

In [None]:
best_losses = []
num_folds = 7
for fold in range(0, num_folds):
    print(best_losses)
    loss = run_training(
        model_class = Model,
        fold = fold,
        save_model = True,
        select_features = True,
        lr = 4e-3,
        decay_factor = 0.1,
        patience  = 7,
        num_epochs = 100
    )
    best_losses.append(loss)

In [None]:
plt.plot(best_losses)
plt.plot([0.015204671770334244, 0.015316605878372988, 0.015278907492756844, 0.015122259967029095, 0.015271329941848913, 0.015176061540842056])

In [None]:
## best tillnow = [0.015204671770334244, 0.015316605878372988, 0.015278907492756844, 0.015122259967029095, 0.015271329941848913, 0.015176061540842056]


In [None]:
class Model_2(nn.Module):
    def __init__(self):
        super(Model_2, self).__init__()
        
        
        self.batch_norm1 = nn.BatchNorm1d(875)
        self.dense1 = nn.utils.weight_norm(nn.Linear(875, 880))
        
        self.batch_norm2 = nn.BatchNorm1d(880)
        self.dense2 = nn.utils.weight_norm(nn.Linear(880, 880))
    
#         self.batch_norm3 = nn.BatchNorm1d(880)
#         self.dense3 = nn.utils.weight_norm(nn.Linear(880, 880))
        
        
        self.batch_norm_out = nn.BatchNorm1d(880)
        self.dense_out = nn.utils.weight_norm(nn.Linear(880, 206))
    
    def forward(self, x):
        
        x_1 = self.batch_norm1(x)
        x_1_res = self.dense1(x_1)
        x_1 = F.relu(x_1_res)
        
        x_2 = self.batch_norm2(x_1)
        x_2_res = self.dense2(x_2)
        x_2 = F.relu(x_2_res + x_1_res) 
    
        x_3 = self.batch_norm2(x_2)
        x_3_res = self.dense2(x_3)
        x_3 = F.relu(x_3_res + x_2_res + x_1_res) 
        
        x_out = self.batch_norm_out(x_3)
        x_out = self.dense_out(x_out)
        return x_out

In [None]:
# def objective_2(trial):
#     params = {
#         "lr": trial.suggest_uniform("lr", 0.5e-4 , 5e-3 ),
#         "decay_factor" : trial.suggest_uniform("decay_factor", 0.01,  0.9),
#     }
    
#     losses = np.zeros(1)
#     for fold in range(1):
#         loss = run_training(
#             model_class = Model_2,
#             fold = fold,
#             save_model = False,
#             select_features = False,
#             lr = params["lr"],
#             decay_factor = params["decay_factor"],
#             patience  = 7,
#             num_epochs = 2,
#             model_id = 100
#         )
#         losses[fold] = loss
        
#     final_loss = losses.mean()
#     return final_loss

In [None]:
# study = optuna.create_study(direction="minimize")


In [None]:
# study.optimize(objective_2, n_trials=100)

In [None]:
# best_params_2 = study.best_trial.params
# best_params_2

In [None]:
best_losses_2 = []
num_folds = 7
for fold in range(0, num_folds):
    print(best_losses_2)
    loss = run_training(
        model_class = Model_2,
        fold = fold,
        save_model = True,
        select_features = False,
        lr =   4e-3,
        decay_factor = 0.1,
        patience  = 7,
        num_epochs = 100,
        model_id = 100
    )
    best_losses_2.append(loss)

In [None]:
print(best_losses_2)  ## better than old model
np.array(best_losses_2).mean()

In [None]:
plt.plot(best_losses, label = "model 1")
plt.plot(best_losses_2, label = "model 2")
plt.legend()

In [None]:
def load_fold(fold, select_features = False):

    df = pd.read_csv('../input/lish-moa/train_features.csv')
    df = preprocess(df, select_features = select_features)
    targets_df = pd.read_csv('./train_targets_folds.csv')

    feature_columns = df.drop("sig_id", axis=1).columns
    target_columns = targets_df.drop(["sig_id", "kfold"], axis=1).columns


    df = df.merge(targets_df, on="sig_id", how="left")

    train_df = df[df.kfold != str(fold)].reset_index(drop=True)
    valid_df = df[df.kfold == str(fold)].reset_index(drop=True)

    x_train = train_df[feature_columns].to_numpy()
    y_train = train_df[target_columns].to_numpy()

    x_val = valid_df[feature_columns].to_numpy()
    y_val = valid_df[target_columns].to_numpy()

    return x_train, y_train, x_val, y_val

class blend():
    def __init__(self,all_preds_np):
        
        self.all_preds = all_preds_np
           
    def predict(self, weights):
        self.weights = weights
        final_pred = np.zeros_like(self.all_preds[0])
        
        for i in range(len(self.all_preds)):
            final_pred += self.all_preds[i] * self.weights[i]
            
        final_pred = final_pred/self.weights.sum()
        
        return final_pred


def inference_fn(model, test_features, device):

    results = np.zeros([test_features.shape[0], 206])
    test_features_tensor = torch.tensor(test_features).float().to(device)
    rows = model(test_features_tensor).sigmoid().detach().cpu().numpy()
    results = rows 

    return results 

class pytorch_model():
    def __init__(self, model_class, model_path, device):
        
        self.model_class = model_class
        self.model_path = model_path
        self.device = device
        
        self.model = self.model_class()
        if self.device == "cuda":
            self.model.load_state_dict(torch.load(self.model_path))
        else:
            self.model.load_state_dict(torch.load(self.model_path, map_location=torch.device('cpu') ))

        self.model.to(self.device)
        self.model.eval()
        
    def predict(self, x):
        
        pred = self.model(x)
        
        return pred 
    
    def __call__(self, x):
        return self.predict(x)


def log_loss_metric(y_true, y_pred):
    y_pred_clip = np.clip(y_pred, 1e-15, 1 - 1e-15)
    loss = - np.mean(np.mean(y_true * np.log(y_pred_clip) + (1 - y_true) * np.log(1 - y_pred_clip), axis = 1))
    return loss

In [None]:
x_train, y_train, x_val, y_val = load_fold("hold", select_features = True)
np.save("hold_labels_785.npy", y_val)
all_preds = []

hold_preds_names =[]
for i in range(7):
    m1  = pytorch_model(Model, "model_"+ str(i) +".pth", device)
    p1 = inference_fn(m1, x_val, device)
    l1 = log_loss_metric(y_val, p1.astype(np.float64))
    print("loss: ", l1, end = "  ")
    all_preds.append(p1)
    
    name = "model_1_hold_"+ str(i) + ".npy"
    np.save(name, p1)
    
    print("saved: ", name)
    
    hold_preds_names.append(name)

best loglosses 
```
loss:  0.01518756834039769  saved:  model_1_hold_0.npy
loss:  0.015188967492687305  saved:  model_1_hold_1.npy
loss:  0.01511348981196045  saved:  model_1_hold_2.npy
loss:  0.015109530036842785  saved:  model_1_hold_3.npy
loss:  0.015151522916018512  saved:  model_1_hold_4.npy
loss:  0.01507399836027305  saved:  model_1_hold_5.npy
loss:  0.015221167192909984  saved:  model_1_hold_6.npy

```

In [None]:
x_train, y_train, x_val, y_val = load_fold("hold", select_features = False)
np.save("hold_labels_875.npy", y_val)

for i in range(7):
    m2  = pytorch_model(Model_2, "model_"+ str(i+100) +".pth", device)
    p2 = inference_fn(m2, x_val, device)
    l2 = log_loss_metric(y_val, p2.astype(np.float64))
    print("loss : ", l2, end = "   ")
    all_preds.append(p2)
    
    name = "model_2_hold_"+ str(i) + ".npy"
    np.save(name, p2)
    
    print("saved: ", name)
    hold_preds_names.append(name)

In [None]:
def load_test_for_inference_fn(select_features):
    test_df = pd.read_csv("../input/lish-moa/test_features.csv")
    preprocessed_df_test = preprocess(test_df, select_features = select_features)

    test_np = preprocessed_df_test[preprocessed_df_test.columns[1:]].values
    
    return test_np

In [None]:
test_875 = load_test_for_inference_fn(select_features = False)
test_785 = load_test_for_inference_fn(select_features = True)


test_pred_names = []
for i in range(7):
    m  = pytorch_model(Model, "model_"+ str(i) +".pth", device)
    p_test = inference_fn(m, test_785, device)
    name = "model_1_test_"+ str(i) + ".npy"
    np.save(name, p_test)
    print("saved: ", name )
    test_pred_names.append(name)
    
    
for i in range(7):
    m_2  = pytorch_model(Model_2, "model_"+ str(i+100) +".pth", device)
    p_test_2 = inference_fn(m_2, test_875, device)
    name = "model_2_test_"+ str(i) + ".npy"
    np.save(name, p_test_2)
    print("saved: ", name )
    test_pred_names.append(name)

In [None]:
!zip -r preds.zip *.npy

In [None]:
hold_preds_names, test_pred_names


In [None]:
hold_labels = np.load("hold_labels_785.npy")
hold_preds = [np.load(name) for name in hold_preds_names]
test_preds = [np.load(name) for name in test_pred_names]

In [None]:
class blend():
    def __init__(self,all_preds_np):
        
        self.all_preds = all_preds_np
           
    def predict(self, weights):
        self.weights = weights
        final_pred = np.zeros_like(self.all_preds[0])
        
        for i in range(len(self.all_preds)):
            final_pred += self.all_preds[i] * self.weights[i]
            
        final_pred = final_pred/self.weights.sum()
        
        return final_pred


In [None]:
def find_loss(params):
    
    weights_np = np.array(list(params.values()))

    weights_np[weights_np < 0.2] = 0
    # print(weights_np)
    blend_boi = blend(hold_preds)
    sample_infer = blend_boi.predict(weights_np)
    
    
    loss = log_loss_metric(hold_labels, sample_infer)
    
    return loss


def objective_blend(trial):

    params = {
        "w1": trial.suggest_uniform("w1", 0.0, 1.0),
        "w2": trial.suggest_uniform("w2", 0.0, 1.0),
        "w3": trial.suggest_uniform("w3", 0.0, 1.0),
        "w4": trial.suggest_uniform("w4", 0.0, 1.0),
        "w5": trial.suggest_uniform("w5", 0.0, 1.0),
        "w6": trial.suggest_uniform("w6", 0.0, 1.0),
        "w7": trial.suggest_uniform("w7", 0.0, 1.0),
        "w8": trial.suggest_uniform("w8", 0.0, 1.0),
        "w9": trial.suggest_uniform("w9", 0.0, 1.0),
        "w10": trial.suggest_uniform("w10", 0.0, 1.0),
        "w11": trial.suggest_uniform("w11", 0.0, 1.0),
        "w12": trial.suggest_uniform("w12", 0.0, 1.0),
        "w13": trial.suggest_uniform("w13", 0.0, 1.0),
        "w14": trial.suggest_uniform("w14", 0.0, 1.0),
    }

    loss_ = find_loss(params) 
    return loss_

In [None]:
len(hold_preds)

In [None]:
study = optuna.create_study(direction="minimize")

In [None]:
study.optimize(objective_blend, n_trials=1000)

In [None]:
"loss = : " + str(study.best_trial.value), study.best_trial.params

In [None]:
best_trial = study.best_params

blend_boi = blend(hold_preds)
noice_weights = np.array(list(best_trial.values()))
noice_weights[noice_weights < 0.2] = 0
sample_infer = blend_boi.predict(noice_weights)
loss = log_loss_metric(hold_labels, sample_infer)
print("loss on hold set: ", loss)  ## scalar only on model 1 was 0.014533262113321026

In [None]:

blend_final = blend(test_preds)
final_preds = blend_final.predict(noice_weights)

In [None]:
plt.plot(final_preds[0])

In [None]:
noice_weights

In [None]:
sample_submission_columns = pd.read_csv("../input/lish-moa/sample_submission.csv").columns[1:]
sig_ids = pd.read_csv("../input/lish-moa/sample_submission.csv")["sig_id"].values

pred_df = pd.DataFrame(final_preds, columns= sample_submission_columns)
pred_df["sig_id"] = sig_ids
columns_arrangement = ["sig_id"]
columns_arrangement.extend(pred_df.columns[:-1])
pred_df = pred_df[columns_arrangement]
save_name = "submission.csv"
print("saved: ", save_name)
pred_df.to_csv(save_name, index = False)


In [None]:
pred_df.head()