In [1]:
import warnings; warnings.simplefilter('ignore', DeprecationWarning)
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import pytorch_lightning as pl

WEIGHT_PATH = "../weight/model.pt"



## All settings
Make sure the settings matches the training settings.

In [2]:
## Hyperparameters Here
params = {
    "num_layers": 3,
    "hidden_size": 16,
    "dropout": 0.3,
    "learning_rate": 1e-3,
}

## Model Here
class BaseLine2(nn.Module):
    def __init__(self, num_features, num_targets, num_layers=3, hidden_size=16, dropout=0.3):
        super().__init__()
        layers = []
        for _ in range(num_layers):
            if len(layers) == 0:
                layers.append(nn.Linear(num_features, hidden_size))
                layers.append(nn.BatchNorm1d(hidden_size))
                layers.append(nn.Dropout(dropout))
                nn.ReLU()
            else:
                layers.append(nn.Linear(hidden_size, hidden_size))
                layers.append(nn.BatchNorm1d(hidden_size))
                layers.append(nn.Dropout(dropout))
                nn.ReLU()
        layers.append(nn.Linear(hidden_size, num_targets))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = self.model(x)
        return x

    
## Feature Engineering Here
def add_dummies(data, col):
    enc = pd.get_dummies(data[col])
    oh_cols = [f"{col}_{c}" for c in enc.columns]
    enc.columns = oh_cols
    data = data.drop(col, axis=1)
    data = data.join(enc)
    return data


def process_data(df):
    df = add_dummies(df, "cp_time")
    df = add_dummies(df, "cp_dose")
    df = add_dummies(df, "cp_type")
    return df

---
## Helper Functions

In [3]:
class TestMoaDataset:
    """ dataset for moa competition.
    Usage:
        dataset = TestMoADataset(dataset=test_features.iloc[:, 1:].values)
    """

    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return self.dataset.shape[0]

    def __getitem__(self, item):
        return {
            "x": torch.tensor(self.dataset[item, :], dtype=torch.float),
        }


class LitMoA(pl.LightningModule):
    def __init__(self, hparams, model):
        super(LitMoA, self).__init__()
        self.hparams = hparams
        self.model = model
        self.criterion = nn.BCEWithLogitsLoss()

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, patience=3, threshold=0.00001, mode="min", verbose=True
        )
        return (
            [optimizer],
            [{"scheduler": scheduler, "interval": "epoch", "monitor": "valid_loss"}],
        )

    def training_step(self, batch, batch_idx):
        data = batch["x"]
        target = batch["y"]
        out = self(data)
        loss = self.criterion(out, target)
        logs = {"training_loss": loss}
        return {"loss": loss, "log": logs, "progress_bar": logs}

    def training_epoch_end(self, outputs):
        loss_avg = torch.stack([x["loss"] for x in outputs]).mean()
        logs = {"train_loss": loss_avg}
        return {"log": logs, "progress_bar": logs}

    def validation_step(self, batch, batch_idx):
        data = batch["x"]
        target = batch["y"]
        out = self(data)
        loss = self.criterion(out, target)
        logs = {"valid_loss": loss}
        return {"loss": loss, "log": logs, "progress_bar": logs}

    def validation_epoch_end(self, outputs):
        loss_avg = torch.stack([x["loss"] for x in outputs]).mean()
        logs = {"valid_loss": loss_avg}
        return {"log": logs, "progress_bar": logs}

---
## Inference From Here

In [4]:
df_te = pd.read_csv("../input/lish-moa/test_features.csv")
df_te = process_data(df_te)
features = df_te.drop("sig_id", axis=1).columns
x_te = df_te[features].to_numpy()
dataset_te = TestMoaDataset(dataset=x_te)
loader_te = torch.utils.data.DataLoader(
    dataset_te, batch_size=1024, num_workers=0, shuffle=False,
)

In [5]:
net = BaseLine2(
    num_features=x_te.shape[1],
    num_targets=207, 
    num_layers=params["num_layers"],
    hidden_size=params["hidden_size"],
    dropout=params["dropout"])
net.load_state_dict(torch.load(WEIGHT_PATH))
model = LitMoA(hparams={}, model=net)

In [6]:
predictions = np.zeros((x_te.shape[0], 206))
inference_model = model.model

In [7]:
inference_model.eval()
for ind, batch in enumerate(loader_te):
    p = torch.sigmoid(inference_model(batch["x"])).detach().cpu().numpy()
    predictions[ind * 1024 : (ind + 1) * 1024] = p[:,:-1]

In [8]:
sub = pd.read_csv('../input/lish-moa/sample_submission.csv')
s = pd.DataFrame({"sig_id": sub["sig_id"].values})
for col in sub.columns[1:].tolist():
    s[col] = 0
s.loc[:, sub.columns[1:]] = predictions

In [9]:
s.to_csv("../submission/submission.csv", index=False)