In [1]:
import torch
import torch.nn as nn
import pytorch_lightning as pl
import numpy as np
import pandas as pd

import sys
sys.path.append('../script/')
from os.path import exists
import utils
import models
DEVICE = "cuda"
EPOCHS = 100



## Add folds No. for CV

In [2]:
path_fold = "../input/folds/train_folds.csv"
if not exists(path_fold):
    from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
    df = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
    df.loc[:, "kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    targets = df.drop("sig_id", axis=1).values

    mskf = MultilabelStratifiedKFold(n_splits=5)
    for fold_, (tr_, va_) in enumerate(mskf.split(X=df, y=targets)):
        df.loc[va_, "kfold"] = fold_
    df.to_csv(path_fold, index=False)

## Params for training function `run_training`

In [3]:
fold = 0
# optuna 使わないので適当
params = {
    "num_layers": 3,
    "hidden_size": 16,
    "dropout": 0.3,
    "learning_rate": 1e-3,
}
save_model=True

---
## Prototyping training process from HERE
`run_training` function contains the following code blocks:
```python
def run_pl_training(fold, params, save_model=False):
```

In [4]:
X_test = pd.read_csv("../input/lish-moa/test_features.csv")
X_train = pd.read_csv("../input/lish-moa/train_features.csv")
train_targets_scored = pd.read_csv("../input/lish-moa/train_targets_scored.csv")
train_targets_nonscored = pd.read_csv("../input/lish-moa/train_targets_nonscored.csv")
sample_submission = pd.read_csv("../input/lish-moa/sample_submission.csv")

In [5]:
for col in ["cp_type", "cp_time", "cp_dose"]:
    X_train = pd.concat([X_train, pd.get_dummies(X_train[col], prefix=col)], axis=1)
    X_test = pd.concat([X_test, pd.get_dummies(X_test[col], prefix=col)], axis=1)
X_train = X_train.drop(["cp_type", "cp_time", "cp_dose"], axis=1)
X_test = X_test.drop(["cp_type", "cp_time", "cp_dose"], axis=1)

In [6]:
trainer = pl.Trainer(gpus=1, max_epochs=5, weights_summary="full")
model = utils.LitMoA(hparams={}, model=models.BaseLine2(879, 206))
dm = utils.MoADataModule(hparams={}, data=X_train, targets=train_targets_scored)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]


In [7]:
trainer.fit(model, dm)


   | Name          | Type              | Params
-----------------------------------------------------
0  | model         | BaseLine2         | 18 K  
1  | model.model   | Sequential        | 18 K  
2  | model.model.0 | Linear            | 14 K  
3  | model.model.1 | BatchNorm1d       | 32    
4  | model.model.2 | Dropout           | 0     
5  | model.model.3 | Linear            | 272   
6  | model.model.4 | BatchNorm1d       | 32    
7  | model.model.5 | Dropout           | 0     
8  | model.model.6 | Linear            | 272   
9  | model.model.7 | BatchNorm1d       | 32    
10 | model.model.8 | Dropout           | 0     
11 | model.model.9 | Linear            | 3 K   
12 | criterion     | BCEWithLogitsLoss | 0     


Epoch 0:  88%|███████████████████████████▏   | 21/24 [00:00<00:00, 33.01it/s, loss=0.727, v_num=7, training_loss=0.716]
Epoch 0: 100%|█████████████| 24/24 [00:00<00:00, 34.88it/s, loss=0.727, v_num=7, training_loss=0.716, valid_loss=0.697]
Epoch 1:  88%|▉| 21/24 [00:00<00:00, 32.96it/s, loss=0.703, v_num=7, training_loss=0.692, valid_loss=0.697, train_loss=
Epoch 1: 100%|█| 24/24 [00:00<00:00, 34.83it/s, loss=0.703, v_num=7, training_loss=0.692, valid_loss=0.677, train_loss=
Epoch 2:  88%|▉| 21/24 [00:00<00:00, 33.06it/s, loss=0.680, v_num=7, training_loss=0.67, valid_loss=0.677, train_loss=0
Epoch 2: 100%|█| 24/24 [00:00<00:00, 34.93it/s, loss=0.680, v_num=7, training_loss=0.67, valid_loss=0.655, train_loss=0
Epoch 3:  88%|▉| 21/24 [00:00<00:00, 31.15it/s, loss=0.653, v_num=7, training_loss=0.634, valid_loss=0.655, train_loss=
Epoch 3: 100%|█| 24/24 [00:00<00:00, 33.00it/s, loss=0.653, v_num=7, training_loss=0.634, valid_loss=0.624, train_loss=
Epoch 4:  88%|▉| 21/24 [00:00<00:00, 33.

Saving latest checkpoint..


Epoch 4: 100%|█| 24/24 [00:00<00:00, 34.77it/s, loss=0.616, v_num=7, training_loss=0.596, valid_loss=0.58, train_loss=0


1

In [8]:
dataset_te = utils.TestMoaDataset(dataset=X_test.iloc[:, 1:].values)
loader_te = torch.utils.data.DataLoader(
    dataset_te, batch_size=1024, num_workers=0, shuffle=False,
)

In [9]:
predictions = np.zeros((X_test.shape[0], 206))
inference_model = model.model
inference_model.eval()
for ind, batch in enumerate(loader_te):
    p = torch.sigmoid(inference_model(batch["x"])).detach().cpu().numpy()
    predictions[ind * 1024 : (ind + 1) * 1024] = p

In [10]:
# Make submission template
test_features1 = pd.read_csv("../input/lish-moa/test_features.csv")
s = pd.DataFrame({"sig_id": test_features1["sig_id"].values})
for col in train_targets_scored.columns[1:].tolist():
    s[col] = 0
s.loc[:, train_targets_scored.columns[1:]] = predictions

In [11]:
# Postprocess: ctl_vehicle sample has no label
s.loc[
    s["sig_id"].isin(test_features1.loc[test_features1["cp_type"] == "ctl_vehicle", "sig_id"]),
    train_targets_scored.columns[1:],
] = 0

In [12]:
s.to_csv("../submission/submission.csv", index=False)
torch.save(model.model.state_dict(), "../weight/model.pt")