In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from model import DenseNet, Model

from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

data_dir = './DATA/lish-moa/'
SEED = 123
np.random.seed(SEED)
torch.manual_seed(SEED)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<torch._C.Generator at 0x1a24b17c50>

In [3]:
X = pd.read_csv(data_dir+'train_features.csv', index_col='sig_id')
y = pd.read_csv(data_dir+'train_targets_scored.csv', index_col='sig_id')

In [5]:
# One-hot encoding for cp_type and cp_dose
X['cp_type'].replace({'trt_cp':1., 'ctl_vehicle':0.}, inplace=True)
X['cp_dose'].replace({'D1':1., 'D2':0.}, inplace=True)

# split into training set and test set
ids = X.index.values.copy()
np.random.shuffle(ids)

train_perc, test_perc = 0.8, 0.2
train_id = ids[:round(len(ids)*train_perc)]
val_id = ids[round(len(ids)*train_perc):round(len(ids)*(train_perc+val_perc))]
test_id = ids[round(len(ids)*(train_perc+val_perc)):]

X_train = X.loc[train_id]
X_val = X.loc[val_id]
X_test = X.loc[test_id]

y_train = y.loc[train_id]
y_val = y.loc[val_id]
y_test = y.loc[test_id]

# normalize the data
scaler = StandardScaler()
X_train_norm = pd.DataFrame(scaler.fit_transform(X_train))
X_train_norm.columns = X_train.columns
X_train_norm.index = X_train.index

X_val_norm = pd.DataFrame(scaler.transform(X_val))
X_val_norm.columns = X_val.columns
X_val_norm.index = X_val.index

X_test_norm = pd.DataFrame(scaler.transform(X_test))
X_test_norm.columns = X_test.columns
X_test_norm.index = X_test.index

In [10]:
params_net = {'input_size': X_train_norm.shape[1],
             'hidden_size': [512, 512, 256, 128 ,256, 512, 1024],
             'output_size': y_train.shape[1],
             'dropout': 0.01}

params_fit = {'X':X_train_norm,
             'y': y_train,
             'epoch': 5,
             'lr': 1e-4,
             'batch_size':128,
             'L2': 0.05,
             'verbose':True}

net = DenseNet(**params_net)
model = Model(net)
model.fit(**params_fit)

Epoch [1, 0] : loss 0.7340331673622131
Epoch [1, 3840] : loss 0.718819260597229
Epoch [1, 7680] : loss 0.7089676856994629
Epoch [1, 11520] : loss 0.7009713649749756
Epoch [1, 15360] : loss 0.6952911615371704
Epoch [2, 0] : loss 0.6936304569244385
Epoch [2, 3840] : loss 0.689566969871521
Epoch [2, 7680] : loss 0.6867527365684509
Epoch [2, 11520] : loss 0.6844825148582458
Epoch [2, 15360] : loss 0.6827470064163208
Epoch [3, 0] : loss 0.6820682883262634
Epoch [3, 3840] : loss 0.6810248494148254
Epoch [3, 7680] : loss 0.6797767281532288
Epoch [3, 11520] : loss 0.6788884401321411
Epoch [3, 15360] : loss 0.6780291795730591
Epoch [4, 0] : loss 0.6775532960891724
Epoch [4, 3840] : loss 0.6772419214248657
Epoch [4, 7680] : loss 0.6762481927871704
Epoch [4, 11520] : loss 0.6756942272186279
Epoch [4, 15360] : loss 0.6751059889793396
Epoch [5, 0] : loss 0.6745458245277405
Epoch [5, 3840] : loss 0.6746001839637756
Epoch [5, 7680] : loss 0.6736698150634766
Epoch [5, 11520] : loss 0.6733357906341553


In [None]:
y_train_pred = model.predict_proba(X_train_norm)
y_test_pred = model.predict_proba(X_test_norm)