** DATA SOURCE:**
> Datasets:  
* lish-moa: https://www.kaggle.com/c/lish-moa
* pytorch-lightning: https://www.kaggle.com/markpeng/pytorch-lightning
* iterative-stratification: https://www.kaggle.com/markpeng/iterative-stratification
* gen-efficientnet-pytorch: https://www.kaggle.com/markpeng/gen-efficientnet-pytorch
* gen-efficientnet-pretrained: https://www.kaggle.com/markpeng/gen-efficientnet-pretrained

** CODE INFERENCE SOURCE: https://www.kaggle.com/markpeng/deepinsight-efficientnet-b3-noisystudent ** 

In [1]:
model_type = "b4"
kfolds = 5
#resolution=224
perplexity=5
patience=15
#image_size=300
fc_size=512
epochs=10
weight_decay=0.000001
T_max=epochs
accumulate_grad_batches=1
gradient_clip_val=0.1

In [2]:
#!pip install torch==1.6.0
##!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
#!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev
#!pip install pytorch-lightning

In [3]:
kernel_mode = True

import sys
if kernel_mode:
    sys.path.insert(0, "../input/iterative-stratification")
    #sys.path.insert(0, "../input/pytorch-lightning")
    sys.path.insert(0, "../input/gen-efficientnet-pytorch")

import os
import numpy as np
import pandas as pd
import time
import random
import math
import pickle
from pickle import dump, load
import glob

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.cm import get_cmap
from matplotlib import rcParams

import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import QuantileTransformer, LabelEncoder, MinMaxScaler, RobustScaler
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import TSNE

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
from torch.autograd import Function
import torch.optim as optim

from torch.nn import Linear, BatchNorm1d, ReLU
from torchvision import transforms

import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.metrics.functional import classification

import geffnet

import cv2

import warnings
warnings.filterwarnings('ignore')

pd.options.display.max_columns = None
sns.set(style="darkgrid")

import gc
gc.enable()

rand_seed = 1120

print(f"PyTorch Version: {torch.__version__}")
print(f"PyTorch Lightning Version: {pl.__version__}")

PyTorch Version: 1.6.0
PyTorch Lightning Version: 1.0.2


In [4]:
#!mkdir -p /root/.cache/torch/hub/checkpoints/
#!cp ../input/gen-efficientnet-pretrained/tf_efficientnet_*.pth /root/.cache/torch/hub/checkpoints/
#!ls -la /root/.cache/torch/hub/checkpoints/

In [5]:
pretrained_model = f"tf_efficientnet_{model_type}_ns"
experiment_name = f"deepinsight_efficientnet_v4_{model_type}"

dataset_folder = "../input/lish-moa" if kernel_mode else "/workspace/Kaggle/MoA/"

model_info = {
    "model_path":
    f"../input/deepinsight-efficientnet-v4-b3/{experiment_name}"
    if kernel_mode else
    f"/workspace/Kaggle/MoA/completed/deepinsight_efficientnet_v4_b3/{experiment_name}"
}

num_workers = 2 if kernel_mode else 6
gpus = [0]

if model_type == "b0":
    batch_size = 128
    infer_batch_size = 256
    image_size = 224  # B0
    drop_rate = 0.2  # B0
    resolution = 224

elif model_type == "b1":
    batch_size = 128
    infer_batch_size = 256
    image_size = 240  # B0
    drop_rate = 0.2  # B0
    resolution = 240
    
elif model_type == "b3":
    batch_size = 24
    infer_batch_size = 75
    image_size = 300  # B3
    drop_rate = 0.3  # B3
    resolution = 300
elif model_type == "b4":
    batch_size = 32
    infer_batch_size = 64
    image_size = 380  # B0
    drop_rate = 0.35  # B0
    resolution = 380
elif model_type == "b5":
    batch_size = 64
    infer_batch_size = 32
    image_size = 224 #456  # B5
    drop_rate = 0.4  # B5
    resolution = 224 #456
elif model_type == "b7":
    batch_size = 2
    infer_batch_size = 4
    # image_size = 800  # B7
    image_size = 772  # B7
    drop_rate = 0.5  # B7
    resolution = 772

# DeepInsight Transform
perplexity = 5

drop_connect_rate = 0.2
fc_size = 512

# Swap Noise
swap_prob = 0.15
swap_portion = 0.1

# Load MoA Data

In [6]:
train_features = pd.read_csv(f"{dataset_folder}/train_features.csv",
                             engine='c')
train_labels = pd.read_csv(f"{dataset_folder}/train_targets_scored.csv",
                           engine='c')

train_extra_labels = pd.read_csv(
    f"{dataset_folder}/train_targets_nonscored.csv", engine='c')

test_features = pd.read_csv(f"{dataset_folder}/test_features.csv", engine='c')

sample_submission = pd.read_csv(f"{dataset_folder}/sample_submission.csv",
                                engine='c')



In [7]:
# Sort by sig_id to ensure that all row orders match
train_features = train_features.sort_values(
    by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)
train_labels = train_labels.sort_values(by=["sig_id"], axis=0,
                                        inplace=False).reset_index(drop=True)
train_extra_labels = train_extra_labels.sort_values(
    by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)

sample_submission = sample_submission.sort_values(
    by=["sig_id"], axis=0, inplace=False).reset_index(drop=True)

In [8]:
train_features.shape, train_labels.shape, train_extra_labels.shape

((23814, 876), (23814, 207), (23814, 403))

In [9]:
test_features.shape

(3982, 876)

In [10]:
category_features = ["cp_type", "cp_dose"]
numeric_features = [
    c for c in train_features.columns
    if c != "sig_id" and c not in category_features
]
all_features = category_features + numeric_features
gene_experssion_features = [c for c in numeric_features if c.startswith("g-")]
cell_viability_features = [c for c in numeric_features if c.startswith("c-")]
len(numeric_features), len(gene_experssion_features), len(
    cell_viability_features)

(873, 772, 100)

In [11]:
train_classes = [c for c in train_labels.columns if c != "sig_id"]
train_extra_classes = [c for c in train_extra_labels.columns if c != "sig_id"]
len(train_classes), len(train_extra_classes)

(206, 402)

# Feature Encoding
As we only have three metadata features, a quick manual encoding process is done. All features are normalized into the value range of [0, 1].

In [12]:
for df in [train_features, test_features]:
    df['cp_type'] = df['cp_type'].map({'ctl_vehicle': 0, 'trt_cp': 1})
    df['cp_dose'] = df['cp_dose'].map({'D1': 0, 'D2': 1})
    df['cp_time'] = df['cp_time'].map({24: 0, 48: 0.5, 72: 1})

# DeepInsight Transform - t-SNE 2D Embeddings
Based on https://github.com/alok-ai-lab/DeepInsight, but with some corrections to the norm-2 normalization.

Most of the credits should be given to the original authors!

## Implementation

Checkout <a href="https://static-content.springer.com/esm/art%3A10.1038%2Fs41598-019-47765-6/MediaObjects/41598_2019_47765_MOESM1_ESM.pdf" target="_blank">DeepInsight paper supplementary information</a> for more details.

In [13]:
# Modified from DeepInsight Transform
# https://github.com/alok-ai-lab/DeepInsight/blob/master/pyDeepInsight/image_transformer.py

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA, KernelPCA
from sklearn.manifold import TSNE
from scipy.spatial import ConvexHull
from matplotlib import pyplot as plt
import inspect


class DeepInsightTransformer:
    """Transform features to an image matrix using dimensionality reduction

    This class takes in data normalized between 0 and 1 and converts it to a
    CNN compatible 'image' matrix

    """
    def __init__(self,
                 feature_extractor='tsne',
                 perplexity=30,
                 pixels=100,
                 random_state=None,
                 n_jobs=None):
        """Generate an ImageTransformer instance

        Args:
            feature_extractor: string of value ('tsne', 'pca', 'kpca') or a
                class instance with method `fit_transform` that returns a
                2-dimensional array of extracted features.
            pixels: int (square matrix) or tuple of ints (height, width) that
                defines the size of the image matrix.
            random_state: int or RandomState. Determines the random number
                generator, if present, of a string defined feature_extractor.
            n_jobs: The number of parallel jobs to run for a string defined
                feature_extractor.
        """
        self.random_state = random_state
        self.n_jobs = n_jobs

        if isinstance(feature_extractor, str):
            fe = feature_extractor.casefold()
            if fe == 'tsne_exact'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='exact',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'tsne'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='barnes_hut',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'pca'.casefold():
                fe = PCA(n_components=2, random_state=self.random_state)
            elif fe == 'kpca'.casefold():
                fe = KernelPCA(n_components=2,
                               kernel='rbf',
                               random_state=self.random_state,
                               n_jobs=self.n_jobs)
            else:
                raise ValueError(("Feature extraction method '{}' not accepted"
                                  ).format(feature_extractor))
            self._fe = fe
        elif hasattr(feature_extractor, 'fit_transform') and \
                inspect.ismethod(feature_extractor.fit_transform):
            self._fe = feature_extractor
        else:
            raise TypeError('Parameter feature_extractor is not a '
                            'string nor has method "fit_transform"')

        if isinstance(pixels, int):
            pixels = (pixels, pixels)

        # The resolution of transformed image
        self._pixels = pixels
        self._xrot = None

    def fit(self, X, y=None, plot=False):
        """Train the image transformer from the training set (X)

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            y: Ignored. Present for continuity with scikit-learn
            plot: boolean of whether to produce a scatter plot showing the
                feature reduction, hull points, and minimum bounding rectangle

        Returns:
            self: object
        """
        # Transpose to get (n_features, n_samples)
        X = X.T

        # Perform dimensionality reduction
        x_new = self._fe.fit_transform(X)

        # Get the convex hull for the points
        chvertices = ConvexHull(x_new).vertices
        hull_points = x_new[chvertices]

        # Determine the minimum bounding rectangle
        mbr, mbr_rot = self._minimum_bounding_rectangle(hull_points)

        # Rotate the matrix
        # Save the rotated matrix in case user wants to change the pixel size
        self._xrot = np.dot(mbr_rot, x_new.T).T

        # Determine feature coordinates based on pixel dimension
        self._calculate_coords()

        # plot rotation diagram if requested
        if plot is True:
            # Create subplots
            fig, ax = plt.subplots(1, 1, figsize=(10, 7), squeeze=False)
            ax[0, 0].scatter(x_new[:, 0],
                             x_new[:, 1],
                             cmap=plt.cm.get_cmap("jet", 10),
                             marker="x",
                             alpha=1.0)
            ax[0, 0].fill(x_new[chvertices, 0],
                          x_new[chvertices, 1],
                          edgecolor='r',
                          fill=False)
            ax[0, 0].fill(mbr[:, 0], mbr[:, 1], edgecolor='g', fill=False)
            plt.gca().set_aspect('equal', adjustable='box')
            plt.show()
        return self

    @property
    def pixels(self):
        """The image matrix dimensions

        Returns:
            tuple: the image matrix dimensions (height, width)

        """
        return self._pixels

    @pixels.setter
    def pixels(self, pixels):
        """Set the image matrix dimension

        Args:
            pixels: int or tuple with the dimensions (height, width)
            of the image matrix

        """
        if isinstance(pixels, int):
            pixels = (pixels, pixels)
        self._pixels = pixels
        # recalculate coordinates if already fit
        if hasattr(self, '_coords'):
            self._calculate_coords()

    def _calculate_coords(self):
        """Calculate the matrix coordinates of each feature based on the
        pixel dimensions.
        """
        ax0_coord = np.digitize(self._xrot[:, 0],
                                bins=np.linspace(min(self._xrot[:, 0]),
                                                 max(self._xrot[:, 0]),
                                                 self._pixels[0])) - 1
        ax1_coord = np.digitize(self._xrot[:, 1],
                                bins=np.linspace(min(self._xrot[:, 1]),
                                                 max(self._xrot[:, 1]),
                                                 self._pixels[1])) - 1
        self._coords = np.stack((ax0_coord, ax1_coord))

    def transform(self, X, empty_value=0):
        """Transform the input matrix into image matrices

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
                where n_features matches the training set.
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """

        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        img_coords = pd.DataFrame(np.vstack(
            (self._coords, X.clip(0, 1))).T).groupby(
                [0, 1],  # (x1, y1)
                as_index=False).mean()

        img_matrices = []
        blank_mat = np.zeros(self._pixels)
        if empty_value != 0:
            blank_mat[:] = empty_value
        for z in range(2, img_coords.shape[1]):
            img_matrix = blank_mat.copy()
            img_matrix[img_coords[0].astype(int),
                       img_coords[1].astype(int)] = img_coords[z]
            img_matrices.append(img_matrix)

        return img_matrices

    def fit_transform(self, X, empty_value=0):
        """Train the image transformer from the training set (X) and return
        the transformed data.

        Args:
            X: {array-like, sparse matrix} of shape (n_samples, n_features)
            empty_value: numeric value to fill elements where no features are
                mapped. Default = 0 (although it was 1 in the paper).

        Returns:
            A list of n_samples numpy matrices of dimensions set by
            the pixel parameter
        """
        self.fit(X)
        return self.transform(X, empty_value=empty_value)

    def feature_density_matrix(self):
        """Generate image matrix with feature counts per pixel

        Returns:
            img_matrix (ndarray): matrix with feature counts per pixel
        """
        fdmat = np.zeros(self._pixels)
        # Group by location (x1, y1) of each feature
        # Tranpose to get (n_features, n_samples)
        coord_cnt = (
            pd.DataFrame(self._coords.T).assign(count=1).groupby(
                [0, 1],  # (x1, y1)
                as_index=False).count())
        fdmat[coord_cnt[0].astype(int),
              coord_cnt[1].astype(int)] = coord_cnt['count']
        return fdmat

    @staticmethod
    def _minimum_bounding_rectangle(hull_points):
        """Find the smallest bounding rectangle for a set of points.

        Modified from JesseBuesking at https://stackoverflow.com/a/33619018
        Returns a set of points representing the corners of the bounding box.

        Args:
            hull_points : an nx2 matrix of hull coordinates

        Returns:
            (tuple): tuple containing
                coords (ndarray): coordinates of the corners of the rectangle
                rotmat (ndarray): rotation matrix to align edges of rectangle
                    to x and y
        """

        pi2 = np.pi / 2.

        # Calculate edge angles
        edges = hull_points[1:] - hull_points[:-1]
        angles = np.arctan2(edges[:, 1], edges[:, 0])
        angles = np.abs(np.mod(angles, pi2))
        angles = np.unique(angles)

        # Find rotation matrices
        rotations = np.vstack([
            np.cos(angles),
            np.cos(angles - pi2),
            np.cos(angles + pi2),
            np.cos(angles)
        ]).T
        rotations = rotations.reshape((-1, 2, 2))

        # Apply rotations to the hull
        rot_points = np.dot(rotations, hull_points.T)

        # Find the bounding points
        min_x = np.nanmin(rot_points[:, 0], axis=1)
        max_x = np.nanmax(rot_points[:, 0], axis=1)
        min_y = np.nanmin(rot_points[:, 1], axis=1)
        max_y = np.nanmax(rot_points[:, 1], axis=1)

        # Find the box with the best area
        areas = (max_x - min_x) * (max_y - min_y)
        best_idx = np.argmin(areas)

        # Return the best box
        x1 = max_x[best_idx]
        x2 = min_x[best_idx]
        y1 = max_y[best_idx]
        y2 = min_y[best_idx]
        rotmat = rotations[best_idx]

        # Generate coordinates
        coords = np.zeros((4, 2))
        coords[0] = np.dot([x1, y2], rotmat)
        coords[1] = np.dot([x2, y2], rotmat)
        coords[2] = np.dot([x2, y1], rotmat)
        coords[3] = np.dot([x1, y1], rotmat)

        return coords, rotmat

In [14]:
class LogScaler:
    """Log normalize and scale data

    Log normalization and scaling procedure as described as norm-2 in the
    DeepInsight paper supplementary information.
    
    Note: The dimensions of input matrix is (N samples, d features)
    """
    def __init__(self):
        self._min0 = None
        self._max = None

    """
    Use this as a preprocessing step in inference mode.
    """
    def fit(self, X, y=None):
        # Min. of training set per feature
        self._min0 = X.min(axis=0)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Global max. of training set from X_norm
        self._max = X_norm.max()

    """
    For training set only.
    """
    def fit_transform(self, X, y=None):
        # Min. of training set per feature
        self._min0 = X.min(axis=0)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Global max. of training set from X_norm
        self._max = X_norm.max()

        # Normalized again by global max. of training set
        return (X_norm / self._max).clip(0, 1)

    """
    For validation and test set only.
    """
    def transform(self, X, y=None):
        # Adjust min. of each feature of X by _min0
        for i in range(X.shape[1]):
            X[:, i] = X[:, i].clip(min=self._min0[i], max=None)

        # Log normalized X by log(X + _min0 + 1)
        X_norm = np.log(
            X +
            np.repeat(np.abs(self._min0)[np.newaxis, :], X.shape[0], axis=0) +
            1).clip(min=0, max=None)

        # Normalized again by global max. of training set
        return (X_norm / self._max).clip(0, 1)

# Dataset

In [15]:
class MoAImageSwapDataset(torch.utils.data.Dataset):
    def __init__(self,
                 features,
                 labels,
                 transformer,
                 swap_prob=0.15,
                 swap_portion=0.1):
        self.features = features
        self.labels = labels
        self.transformer = transformer
        self.swap_prob = swap_prob
        self.swap_portion = swap_portion

    def __getitem__(self, index):
        normalized = self.features[index, :]

        # Swap row features randomly
        normalized = self.add_swap_noise(index, normalized)

        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=1 to follow the setup in the paper
        image = self.transformer.transform(normalized, empty_value=1)[0]

        # Resize to target size
        gene_cht = cv2.resize(image, (image_size, image_size),
                              interpolation=cv2.INTER_CUBIC)

        # Convert to 3 channels
        image = np.repeat(gene_cht[np.newaxis, :, :], 3, axis=0)

        return {"x": image, "y": self.labels[index, :]}

    def add_swap_noise(self, index, X):
        if np.random.rand() < self.swap_prob:
            swap_index = np.random.randint(self.features.shape[0], size=1)[0]
            # Select only gene expression and cell viability features
            swap_features = np.random.choice(
                np.array(range(3, self.features.shape[1])),
                size=int(self.features.shape[1] * self.swap_portion),
                replace=False)
            X[swap_features] = self.features[swap_index, swap_features]

        return X

    def __len__(self):
        return self.features.shape[0]

In [16]:
class MoAImageDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transformer):
        self.features = features
        self.labels = labels
        self.transformer = transformer

    def __getitem__(self, index):
        normalized = self.features[index, :]
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=1 to follow the setup in the paper
        image = self.transformer.transform(normalized, empty_value=1)[0]

        # Resize to target size
        gene_cht = cv2.resize(image, (image_size, image_size),
                              interpolation=cv2.INTER_CUBIC)

        # Convert to 3 channels
        image = np.repeat(gene_cht[np.newaxis, :, :], 3, axis=0)

        return {"x": image, "y": self.labels[index, :]}

    def __len__(self):
        return self.features.shape[0]


class TestDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, transformer):
        self.features = features
        self.labels = labels
        self.transformer = transformer

    def __getitem__(self, index):
        normalized = self.features[index, :]
        normalized = np.expand_dims(normalized, axis=0)

        # Note: we are setting empty_value=1 to follow the setup in the paper
        image = self.transformer.transform(normalized, empty_value=1)[0]

        # Resize to target size
        gene_cht = cv2.resize(image, (image_size, image_size),
                              interpolation=cv2.INTER_CUBIC)

        # Convert to 3 channels
        image = np.repeat(gene_cht[np.newaxis, :, :], 3, axis=0)

        return {"x": image, "y": -1}

    def __len__(self):
        return self.features.shape[0]

# Model Definition

In [17]:
METRICS = {
    'epoch':[0],
    'train_loss':[0],
    'train_acc':[0],
    'val_acc':[0],
    'val_loss':[0],
    'lr': [0],
}

In [18]:
# Reference: https://github.com/rwightman/gen-efficientnet-pytorch/blob/master/geffnet/efficientnet_builder.py#L672
def initialize_weight_goog(m, n='', fix_group_fanout=True):
    # weight init as per Tensorflow Official impl
    # https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
    if isinstance(m, nn.Conv2d):
        fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        if fix_group_fanout:
            fan_out //= m.groups
        m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
        if m.bias is not None:
            m.bias.data.zero_()
    elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1.0)
        m.bias.data.zero_()
    elif isinstance(m, nn.Linear):
        fan_out = m.weight.size(0)  # fan-out
        fan_in = 0
        if 'routing_fn' in n:
            fan_in = m.weight.size(1)
        init_range = 1.0 / math.sqrt(fan_in + fan_out)
        m.weight.data.uniform_(-init_range, init_range)
        m.bias.data.zero_()


def initialize_weight_default(m, n=''):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1.0)
        m.bias.data.zero_()
    elif isinstance(m, nn.Linear):
        nn.init.kaiming_uniform_(m.weight,
                                 mode='fan_in',
                                 nonlinearity='linear')

In [19]:
class MoAEfficientNet(pl.LightningModule):
    def __init__(
            self,
            pretrained_model_name,
            training_set=(None, None),  # tuple
            valid_set=(None, None),  # tuple
            test_set=None,
            transformer=None,
            num_classes=206,
            in_chans=3,
            drop_rate=0.,
            drop_connect_rate=0.,
            fc_size=512,
            learning_rate=1e-3,
            weight_init='goog'):
        super(MoAEfficientNet, self).__init__()

        self.train_data, self.train_labels = training_set
        self.valid_data, self.valid_labels = valid_set
        self.test_data = test_set
        self.transformer = transformer

        self.backbone = getattr(geffnet, pretrained_model)(
            pretrained=True,
            in_chans=in_chans,
            drop_rate=drop_rate,
            drop_connect_rate=drop_connect_rate,
            weight_init=weight_init)

        self.backbone.classifier = nn.Sequential(
            nn.Linear(self.backbone.classifier.in_features, fc_size,
                      bias=True), nn.ELU(),
            nn.Linear(fc_size, num_classes, bias=True))

        if self.training:
            for m in self.backbone.classifier.modules():
                initialize_weight_goog(m)

        # Save passed hyperparameters
        self.save_hyperparameters("pretrained_model_name", "num_classes",
                                  "in_chans", "drop_rate", "drop_connect_rate",
                                  "weight_init", "fc_size", "learning_rate")

    def forward(self, x):
        return self.backbone(x)

    def training_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)

        loss = F.binary_cross_entropy_with_logits(logits, y, reduction="mean")

        self.log('train_loss',
                 loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)

        val_loss = F.binary_cross_entropy_with_logits(logits,
                                                      y,
                                                      reduction="mean")

        self.log('val_loss',
                 val_loss,
                 on_step=True,
                 on_epoch=True,
                 prog_bar=True,
                 logger=True)

        return val_loss

    def test_step(self, batch, batch_idx):
        x = batch["x"]
        y = batch["y"]
        x = x.float()
        y = y.type_as(x)
        logits = self(x)
        return {"pred_logits": logits}

    def test_epoch_end(self, output_results):
        all_outputs = torch.cat([out["pred_logits"] for out in output_results],
                                dim=0)
        print("Logits:", all_outputs)
        pred_probs = F.sigmoid(all_outputs).detach().cpu().numpy()
        print("Predictions: ", pred_probs)
        return {"pred_probs": pred_probs}

    def setup(self, stage=None):
        #         self.train_dataset = MoAImageDataset(self.train_data,
        #                                              self.train_labels,
        #                                              self.transformer)
        self.train_dataset = MoAImageSwapDataset(self.train_data,
                                                 self.train_labels,
                                                 self.transformer,
                                                 swap_prob=swap_prob,
                                                 swap_portion=swap_portion)

        self.val_dataset = MoAImageDataset(self.valid_data, self.valid_labels,
                                           self.transformer)

        self.test_dataset = TestDataset(self.test_data, None, self.transformer)

    def train_dataloader(self):
        train_dataloader = DataLoader(self.train_dataset,
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers,
                                      pin_memory=True,
                                      drop_last=False)
        print(f"Train iterations: {len(train_dataloader)}")
        return train_dataloader

    def val_dataloader(self):
        val_dataloader = DataLoader(self.val_dataset,
                                    batch_size=infer_batch_size,
                                    shuffle=False,
                                    num_workers=num_workers,
                                    pin_memory=True,
                                    drop_last=False)
        print(f"Validate iterations: {len(val_dataloader)}")
        return val_dataloader

    def test_dataloader(self):
        test_dataloader = DataLoader(self.test_dataset,
                                     batch_size=infer_batch_size,
                                     shuffle=False,
                                     num_workers=num_workers,
                                     pin_memory=True,
                                     drop_last=False)
        print(f"Test iterations: {len(test_dataloader)}")
        return test_dataloader

    def configure_optimizers(self):
        print(f"Initial Learning Rate: {self.hparams.learning_rate:.6f}")
        optimizer = optim.Adam(self.parameters(),
                               lr=self.hparams.learning_rate,
                               weight_decay=weight_decay)

        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=T_max,
                                                         eta_min=0,
                                                         last_epoch=-1)

        return [optimizer], [scheduler]

# CROSS VALIDATION

In [20]:

skf = MultilabelStratifiedKFold(n_splits=kfolds,
                                shuffle=True,
                                random_state=rand_seed)

label_counts = np.sum(train_labels.drop("sig_id", axis=1), axis=0)
y_labels = label_counts.index.tolist()

In [21]:
SEED = 42
FOLDS = 5

# LOAD LIBRARIES (from PIP or Kaggle Dataset)
#! pip install iterative-stratification 
from sklearn.model_selection import KFold
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

# LOAD FILES
scored = train_labels.copy()
drug = pd.read_csv('/kaggle/input/lish-moa/train_drug.csv')
targets = scored.columns[1:]
scored = scored.merge(drug, on='sig_id', how='left') 

# LOCATE DRUGS
vc = scored.drug_id.value_counts()
vc1 = vc.loc[vc<=18].index.sort_values()
vc2 = vc.loc[vc>18].index.sort_values()

# STRATIFY DRUGS 18X OR LESS
dct1 = {}; dct2 = {}
skf = MultilabelStratifiedKFold(n_splits=FOLDS, shuffle=True, 
          random_state=SEED)
tmp = scored.groupby('drug_id')[targets].mean().loc[vc1]
for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
    dd = {k:fold for k in tmp.index[idxV].values}
    dct1.update(dd)

# STRATIFY DRUGS MORE THAN 18X
skf = MultilabelStratifiedKFold(n_splits=FOLDS, shuffle=True, 
          random_state=SEED)
tmp = scored.loc[scored.drug_id.isin(vc2)].reset_index(drop=True)
for fold,(idxT,idxV) in enumerate( skf.split(tmp,tmp[targets])):
    dd = {k:fold for k in tmp.sig_id[idxV].values}
    dct2.update(dd)

# ASSIGN FOLDS
scored['fold'] = scored.drug_id.map(dct1)
scored.loc[scored.fold.isna(),'fold'] =\
    scored.loc[scored.fold.isna(),'sig_id'].map(dct2)
scored.fold = scored.fold.astype('int8')

# GET MODELS

In [22]:
def get_model(model_path, test_set, transformer):
    model = MoAEfficientNet.load_from_checkpoint(
        model_path,
        pretrained_model_name=pretrained_model,
        training_set=(None, None),  # tuple
        valid_set=(None, None),  # tuple
        test_set=test_set,
        transformer=transformer,
        drop_rate=drop_rate,
        drop_connect_rate=drop_connect_rate,
        fc_size=fc_size,
        weight_init='goog')

    model.freeze()
    model.eval()
    return model


def save_pickle(obj, model_output_folder, fold_i, name):
    dump(obj, open(f"{model_output_folder}/fold{fold_i}_{name}.pkl", 'wb'),
         pickle.HIGHEST_PROTOCOL)


def load_pickle(model_output_folder, fold_i, name):
    return load(open(f"{model_output_folder}/fold{fold_i}_{name}.pkl", 'rb'))

In [23]:
def mean_logloss(y_pred, y_true):
    logloss = (1 - y_true) * np.log(1 - y_pred +
                                    1e-15) + y_true * np.log(y_pred + 1e-15)
    return np.mean(-logloss)

# TRAINING

In [24]:
def extract_feature_map(train, resolution, perplexity):
    transformer= DeepInsightTransformer(pixels=resolution,
                                perplexity=perplexity)
    transformer=transformer.fit(train)
    return transformer

In [25]:
def norm2_normalization(train, valid, test):
    scaler= LogScaler()
    train_set=scaler.fit_transform(train)
    valid_set=scaler.transform(valid)
    test_set=scaler.transform(test)
    return train_set, valid_set, test_set, scaler

In [26]:
def get_model_tr(test_set,transformer,training_set, valid_set):
    model = MoAEfficientNet(
        pretrained_model_name=pretrained_model,
        training_set=(train,fold_train_labels ),  # tuple
        valid_set=(valid,fold_valid_labels ),  # tuple
        test_set=valid,
        transformer=transformer,
        drop_rate=drop_rate,
        drop_connect_rate=drop_connect_rate,
        fc_size=fc_size,
        weight_init='goog')

    #model.freeze()
    #model.eval()
    return model

In [27]:
print(train_features.shape, scored.shape, train_labels.shape)

(23814, 876) (23814, 209) (23814, 207)


In [28]:
train_features2 = train_features.copy()
train_features = train_features.loc[train_features['cp_type']!=0].reset_index(drop=True)
scored = scored.loc[train_features2['cp_type']!=0].reset_index(drop=True)
train_labels = train_labels.loc[train_features2['cp_type']!=0].reset_index(drop=True)

In [29]:
print(train_features.shape, scored.shape, train_labels.shape)

(21948, 876) (21948, 209) (21948, 207)


In [30]:
# Ensure Reproducibility
seed_everything(rand_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

train_targets5=train_labels.drop('sig_id', axis=1)
res = train_targets5.copy()
res.loc[:, train_targets5.columns] = 0

for i in range(kfolds):
    
    model_output_folder= '/kaggle/working'
    logger = TensorBoardLogger(model_output_folder,
                           name=f"fold{i}/logs",
                           default_hp_metric=False)

    train = train_features.loc[scored.fold!=i, all_features].copy().values
    fold_train_labels = train_labels.loc[scored.fold!=i, train_classes].copy().values
    valid = train_features.loc[scored.fold==i, all_features].copy().values
    fold_valid_labels = train_labels.loc[scored.fold==i, train_classes].copy().values
    test = test_features[all_features].copy().values

    # LogScaler (Norm-2 Normalization)
    #print("Running norm-2 normalization ......")
    train, valid, test, scaler = norm2_normalization(train, valid, test)
    save_pickle(scaler, model_output_folder, i, "log-scaler")

    # Extract DeepInsight Feature Map
    #print("Extracting feature map ......")
    transformer = extract_feature_map(train,
                                  resolution=resolution,
                                  perplexity=perplexity)
    save_pickle(transformer, model_output_folder, i, "deepinsight-transform")

    model = get_model_tr(training_set=(train, fold_train_labels),
                  valid_set=(valid, fold_valid_labels),
                  test_set=valid,
                  transformer=transformer)

    callbacks = [
      EarlyStopping(monitor='val_loss_epoch',
                  min_delta=1e-6,
                  patience=patience,
                  verbose=True,
                  mode='min',
                  strict=True),
       LearningRateMonitor(logging_interval='step')
               ]
  # https://pytorch-lightning.readthedocs.io/en/latest/generated/pytorch_lightning.callbacks.ModelCheckpoint.html#pytorch_lightning.callbacks.ModelCheckpoint
    checkpoint_callback = ModelCheckpoint(
      filepath=f"{model_output_folder}/fold{i}" +
      "/{epoch}-{train_loss_epoch:.6f}-{val_loss_epoch:.6f}" +
      f"-image_size={image_size}-resolution={resolution}-perplexity={perplexity}-fc={fc_size}",
      save_top_k=1,
      save_weights_only=False,
      save_last=False,
      verbose=True,
      monitor='val_loss_epoch',
      mode='min',
      prefix='')

    trainer = Trainer(
      gpus=gpus,
      distributed_backend="dp",  # multiple-gpus, 1 machine
      max_epochs=epochs,
      benchmark=False,
      deterministic=True,
      checkpoint_callback=checkpoint_callback,
      callbacks=callbacks,
      accumulate_grad_batches=accumulate_grad_batches,
      gradient_clip_val=gradient_clip_val,
      precision=16,
      logger=logger)
    trainer.fit(model)
    
    oofs = trainer.test(model, verbose=False)[0]
    oofs_preds = oofs["pred_probs"]
    res.loc[scored.fold==i, train_targets5.columns] += oofs_preds
    
    del model, trainer, scaler, transformer, train, valid, checkpoint_callback, callbacks, oofs, oofs_preds, fold_valid_labels, fold_train_labels, test
    torch.cuda.empty_cache()
    gc.collect()

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4_ns-d6313a46.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_b4_ns-d6313a46.pth
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
Missing logger folder: /kaggle/working/fold0/logs

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 18 M  


Initial Learning Rate: 0.001000
Validate iterations: 70


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Train iterations: 547


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0: val_loss_epoch reached 0.02153 (best 0.02153), saving model to /kaggle/working/fold0/epoch=0-train_loss_epoch=0.000000-val_loss_epoch=0.021529-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1: val_loss_epoch reached 0.02095 (best 0.02095), saving model to /kaggle/working/fold0/epoch=1-train_loss_epoch=0.028739-val_loss_epoch=0.020946-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3: val_loss_epoch reached 0.02012 (best 0.02012), saving model to /kaggle/working/fold0/epoch=3-train_loss_epoch=0.020142-val_loss_epoch=0.020117-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4: val_loss_epoch reached 0.01927 (best 0.01927), saving model to /kaggle/working/fold0/epoch=4-train_loss_epoch=0.019513-val_loss_epoch=0.019269-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 5: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 6: val_loss_epoch reached 0.01862 (best 0.01862), saving model to /kaggle/working/fold0/epoch=6-train_loss_epoch=0.018170-val_loss_epoch=0.018621-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 7: val_loss_epoch reached 0.01828 (best 0.01828), saving model to /kaggle/working/fold0/epoch=7-train_loss_epoch=0.017663-val_loss_epoch=0.018275-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 8: val_loss_epoch reached 0.01809 (best 0.01809), saving model to /kaggle/working/fold0/epoch=8-train_loss_epoch=0.017172-val_loss_epoch=0.018090-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 9: val_loss_epoch was not in top 1



Test iterations: 70


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[-12.6719,  -9.0234,  -8.5938,  ..., -10.5859,  -7.2969,  -9.3906],
        [ -6.1211,  -6.6016,  -7.2539,  ...,  -6.9570,  -6.0078,  -6.4805],
        [-13.1016, -11.0391,  -5.1250,  ...,  -6.3281,  -8.4609,  -7.1602],
        ...,
        [ -6.7617,  -6.8945,  -8.0469,  ...,  -6.9492,  -6.2969,  -6.6133],
        [ -7.0117,  -7.6523,  -7.2695,  ...,  -6.0195,  -7.0859,  -6.4258],
        [ -6.1172,  -6.6445,  -8.0312,  ...,  -6.7539,  -7.9570,  -6.6680]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[3.1590e-06 1.2052e-04 1.8525e-04 ... 2.5272e-05 6.7711e-04 8.3506e-05]
 [2.1915e-03 1.3561e-03 7.0667e-04 ... 9.5081e-04 2.4529e-03 1.5306e-03]
 [2.0266e-06 1.6034e-05 5.9128e-03 ... 1.7824e-03 2.1148e-04 7.7629e-04]
 ...
 [1.1559e-03 1.0118e-03 3.1996e-04 ... 9.5844e-04 1.8387e-03 1.3409e-03]
 [9.0027e-04 4.7469e-04 6.9618e-04 ... 2.4242e-03 8.3590e-04 1.6165e-03]
 [2.1992e-03 1.2999e-03 3.2496e-04 ... 1.1654e-03 3.5000e-04 1.2693e-03]]



GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
Missing logger folder: /kaggle/working/fold1/logs

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 18 M  


Initial Learning Rate: 0.001000
Validate iterations: 69


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Train iterations: 549


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0: val_loss_epoch reached 0.02114 (best 0.02114), saving model to /kaggle/working/fold1/epoch=0-train_loss_epoch=0.000000-val_loss_epoch=0.021137-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1: val_loss_epoch reached 0.02066 (best 0.02066), saving model to /kaggle/working/fold1/epoch=1-train_loss_epoch=0.028681-val_loss_epoch=0.020657-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3: val_loss_epoch reached 0.02049 (best 0.02049), saving model to /kaggle/working/fold1/epoch=3-train_loss_epoch=0.020032-val_loss_epoch=0.020486-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4: val_loss_epoch reached 0.01861 (best 0.01861), saving model to /kaggle/working/fold1/epoch=4-train_loss_epoch=0.019051-val_loss_epoch=0.018606-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 5: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 6: val_loss_epoch reached 0.01807 (best 0.01807), saving model to /kaggle/working/fold1/epoch=6-train_loss_epoch=0.017955-val_loss_epoch=0.018066-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 7: val_loss_epoch reached 0.01787 (best 0.01787), saving model to /kaggle/working/fold1/epoch=7-train_loss_epoch=0.017529-val_loss_epoch=0.017867-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 8: val_loss_epoch reached 0.01781 (best 0.01781), saving model to /kaggle/working/fold1/epoch=8-train_loss_epoch=0.017148-val_loss_epoch=0.017811-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 9: val_loss_epoch reached 0.01774 (best 0.01774), saving model to /kaggle/working/fold1/epoch=9-train_loss_epoch=0.016756-val_loss_epoch=0.017741-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1



Test iterations: 69


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[ -7.1328,  -6.3047,  -6.4062,  ...,  -6.8633,  -6.7461,  -6.1328],
        [ -7.5977,  -6.6094,  -6.5547,  ...,  -6.5625,  -7.8828,  -6.4648],
        [ -5.9844,  -6.5352,  -7.1758,  ...,  -6.7344,  -7.8633,  -6.5078],
        ...,
        [-14.6641, -12.2422, -13.1328,  ..., -16.0781, -15.2734, -14.3359],
        [-10.5859,  -8.7578,  -7.9062,  ...,  -9.5859,  -7.2461,  -8.2109],
        [-16.5312, -14.0938,  -7.3320,  ...,  -8.6328,  -9.7109, -10.9297]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[7.9775e-04 1.8244e-03 1.6489e-03 ... 1.0443e-03 1.1740e-03 2.1648e-03]
 [5.0116e-04 1.3456e-03 1.4210e-03 ... 1.4105e-03 3.7694e-04 1.5545e-03]
 [2.5120e-03 1.4496e-03 7.6437e-04 ... 1.1883e-03 3.8457e-04 1.4896e-03]
 ...
 [4.1723e-07 4.8280e-06 1.9670e-06 ... 1.1921e-07 2.3842e-07 5.9605e-07]
 [2.5272e-05 1.5724e-04 3.6836e-04 ... 6.8665e-05 7.1239e-04 2.7156e-04]
 [5.9605e-08 7.7486e-07 6.5374e-04 ... 1.7810e-04 6.0618e-05 1.7941e-05]]



GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
Missing logger folder: /kaggle/working/fold2/logs

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 18 M  


Initial Learning Rate: 0.001000
Validate iterations: 68


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Train iterations: 551


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0: val_loss_epoch reached 0.02142 (best 0.02142), saving model to /kaggle/working/fold2/epoch=0-train_loss_epoch=0.000000-val_loss_epoch=0.021419-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2: val_loss_epoch reached 0.02060 (best 0.02060), saving model to /kaggle/working/fold2/epoch=2-train_loss_epoch=0.020722-val_loss_epoch=0.020597-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4: val_loss_epoch reached 0.02013 (best 0.02013), saving model to /kaggle/working/fold2/epoch=4-train_loss_epoch=0.019414-val_loss_epoch=0.020133-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 5: val_loss_epoch reached 0.01889 (best 0.01889), saving model to /kaggle/working/fold2/epoch=5-train_loss_epoch=0.018899-val_loss_epoch=0.018886-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 6: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 7: val_loss_epoch reached 0.01838 (best 0.01838), saving model to /kaggle/working/fold2/epoch=7-train_loss_epoch=0.017671-val_loss_epoch=0.018375-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 8: val_loss_epoch reached 0.01820 (best 0.01820), saving model to /kaggle/working/fold2/epoch=8-train_loss_epoch=0.017159-val_loss_epoch=0.018196-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 9: val_loss_epoch reached 0.01803 (best 0.01803), saving model to /kaggle/working/fold2/epoch=9-train_loss_epoch=0.016742-val_loss_epoch=0.018032-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1



Test iterations: 68


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[ -7.4180,  -7.2695,  -6.5312,  ...,  -5.6875,  -5.9258,  -5.6445],
        [ -8.0312,  -8.4766,  -6.8711,  ...,  -4.9805,  -6.0703,  -5.4062],
        [-10.8984, -12.5078, -13.5234,  ..., -17.0000, -14.3594, -18.0000],
        ...,
        [ -6.6445,  -5.9570,  -6.1992,  ...,  -6.5820,  -5.8281,  -6.4258],
        [ -7.3555,  -7.0156,  -6.8086,  ...,  -5.8125,  -6.3867,  -5.8398],
        [ -6.5312,  -6.6523,  -6.6133,  ...,  -6.6836,  -6.0664,  -6.5156]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[5.9986e-04 6.9618e-04 1.4553e-03 ... 3.3760e-03 2.6627e-03 3.5248e-03]
 [3.2496e-04 2.0826e-04 1.0366e-03 ... 6.8245e-03 2.3060e-03 4.4670e-03]
 [1.8477e-05 3.6955e-06 1.3113e-06 ... 5.9605e-08 5.9605e-07 0.0000e+00]
 ...
 [1.2999e-03 2.5806e-03 2.0275e-03 ... 1.3828e-03 2.9354e-03 1.6165e-03]
 [6.3848e-04 8.9693e-04 1.1034e-03 ... 2.9812e-03 1.6813e-03 2.9011e-03]
 [1.4553e-03 1.2894e-03 1.3409e-03 ... 1.2493e-03 2.3136e-03 1.4782e-03]]



GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
Missing logger folder: /kaggle/working/fold3/logs

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 18 M  


Initial Learning Rate: 0.001000
Validate iterations: 68


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Train iterations: 550


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0: val_loss_epoch reached 0.02170 (best 0.02170), saving model to /kaggle/working/fold3/epoch=0-train_loss_epoch=0.000000-val_loss_epoch=0.021702-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1: val_loss_epoch reached 0.02090 (best 0.02090), saving model to /kaggle/working/fold3/epoch=1-train_loss_epoch=0.028905-val_loss_epoch=0.020899-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2: val_loss_epoch reached 0.02054 (best 0.02054), saving model to /kaggle/working/fold3/epoch=2-train_loss_epoch=0.020709-val_loss_epoch=0.020543-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4: val_loss_epoch reached 0.01897 (best 0.01897), saving model to /kaggle/working/fold3/epoch=4-train_loss_epoch=0.018791-val_loss_epoch=0.018967-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 5: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 6: val_loss_epoch reached 0.01840 (best 0.01840), saving model to /kaggle/working/fold3/epoch=6-train_loss_epoch=0.017807-val_loss_epoch=0.018396-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 7: val_loss_epoch reached 0.01800 (best 0.01800), saving model to /kaggle/working/fold3/epoch=7-train_loss_epoch=0.017365-val_loss_epoch=0.018005-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 8: val_loss_epoch reached 0.01794 (best 0.01794), saving model to /kaggle/working/fold3/epoch=8-train_loss_epoch=0.016939-val_loss_epoch=0.017943-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 9: val_loss_epoch was not in top 1



Test iterations: 68


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[ -6.0156,  -6.3242,  -7.5117,  ...,  -6.2812,  -6.2578,  -5.9492],
        [ -8.7344, -10.5469,  -9.0703,  ...,  -9.0156,  -6.5273,  -7.0469],
        [ -8.8125,  -8.0234,  -5.5039,  ...,  -6.4727,  -5.2969,  -6.6484],
        ...,
        [-10.8906,  -9.8516,  -8.2812,  ...,  -8.4453,  -7.2188,  -7.7852],
        [ -4.9336,  -6.2227,  -8.3047,  ...,  -6.8867,  -6.7266,  -6.3750],
        [-10.1953,  -8.6641,  -8.4297,  ...,  -8.2188,  -9.0312,  -8.1250]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[2.4338e-03 1.7891e-03 5.4646e-04 ... 1.8673e-03 1.9121e-03 2.6016e-03]
 [1.6093e-04 2.6286e-05 1.1504e-04 ... 1.2147e-04 1.4610e-03 8.6927e-04]
 [1.4889e-04 3.2759e-04 4.0550e-03 ... 1.5430e-03 4.9820e-03 1.2941e-03]
 ...
 [1.8656e-05 5.2691e-05 2.5320e-04 ... 2.1482e-04 7.3195e-04 4.1580e-04]
 [7.1487e-03 1.9798e-03 2.4724e-04 ... 1.0204e-03 1.1969e-03 1.7004e-03]
 [3.7372e-05 1.7262e-04 2.1827e-04 ... 2.6941e-04 1.1963e-04 2.9588e-04]]



GPU available: True, used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.
Missing logger folder: /kaggle/working/fold4/logs

  | Name     | Type            | Params
---------------------------------------------
0 | backbone | GenEfficientNet | 18 M  


Initial Learning Rate: 0.001000
Validate iterations: 69


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

Train iterations: 549


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 0: val_loss_epoch reached 0.02717 (best 0.02717), saving model to /kaggle/working/fold4/epoch=0-train_loss_epoch=0.000000-val_loss_epoch=0.027171-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 1: val_loss_epoch reached 0.02109 (best 0.02109), saving model to /kaggle/working/fold4/epoch=1-train_loss_epoch=0.027612-val_loss_epoch=0.021093-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 2: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 3: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 4: val_loss_epoch reached 0.01858 (best 0.01858), saving model to /kaggle/working/fold4/epoch=4-train_loss_epoch=0.018724-val_loss_epoch=0.018580-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 5: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 6: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 7: val_loss_epoch was not in top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 8: val_loss_epoch reached 0.01827 (best 0.01827), saving model to /kaggle/working/fold4/epoch=8-train_loss_epoch=0.016869-val_loss_epoch=0.018267-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Epoch 9: val_loss_epoch reached 0.01820 (best 0.01820), saving model to /kaggle/working/fold4/epoch=9-train_loss_epoch=0.016425-val_loss_epoch=0.018197-image_size=380-resolution=380-perplexity=5-fc=512.ckpt as top 1



Test iterations: 69


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

Logits: tensor([[ -6.1641,  -7.1484,  -7.7734,  ...,  -6.6445,  -7.3398,  -5.9531],
        [ -9.1641,  -7.0234, -10.2734,  ...,  -9.6875, -10.5391,  -9.0312],
        [-10.0000, -11.9922, -19.2500,  ..., -17.1406, -20.5469, -12.4375],
        ...,
        [ -7.0508,  -8.3984,  -7.1719,  ...,  -6.4102,  -6.4102,  -6.0078],
        [ -8.3203,  -9.3906,  -6.0234,  ...,  -8.1562,  -5.1445,  -6.5508],
        [-22.3438, -21.7188,  -9.0391,  ..., -16.3906,  -8.9922, -13.1797]],
       device='cuda:0', dtype=torch.float16)
Predictions:  [[2.100e-03 7.854e-04 4.206e-04 ... 1.300e-03 6.485e-04 2.590e-03]
 [1.047e-04 8.898e-04 3.451e-05 ... 6.205e-05 2.646e-05 1.196e-04]
 [4.542e-05 6.199e-06 0.000e+00 ... 5.960e-08 0.000e+00 3.994e-06]
 ...
 [8.659e-04 2.252e-04 7.672e-04 ... 1.642e-03 1.642e-03 2.453e-03]
 [2.434e-04 8.351e-05 2.415e-03 ... 2.868e-04 5.798e-03 1.427e-03]
 [0.000e+00 0.000e+00 1.187e-04 ... 5.960e-08 1.243e-04 1.907e-06]]



In [31]:
oof_loss = mean_logloss(res.values, train_labels[train_classes].values)
print(f"CV logloss: {oof_loss:.6f}")

CV logloss: 0.018025


In [32]:
res.to_csv('oofs.csv')