In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from model import DenseNet, Model, DenseBlock

from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

data_dir = './DATA/lish-moa/'
SEED = 123
np.random.seed(SEED)
torch.manual_seed(SEED)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<torch._C.Generator at 0x1a24dc4590>

## Load data set

In [2]:
X = pd.read_csv(data_dir+'train_features.csv', index_col='sig_id')
y = pd.read_csv(data_dir+'train_targets_scored.csv', index_col='sig_id')

## Preprocess

In [3]:
# One-hot encoding for cp_type and cp_dose
X['cp_type'].replace({'trt_cp':1., 'ctl_vehicle':0.}, inplace=True)
X['cp_dose'].replace({'D1':1., 'D2':0.}, inplace=True)

# split into training set and test set
ids = X.index.values.copy()
np.random.shuffle(ids)

train_perc, test_perc = 0.8, 0.2
train_id = ids[:round(len(ids)*train_perc)]
test_id = ids[round(len(ids)*train_perc):]

X_train = X.loc[train_id]
X_test = X.loc[test_id]

y_train = y.loc[train_id]
y_test = y.loc[test_id]

# normalize the data
scaler = StandardScaler()
X_train_norm = pd.DataFrame(scaler.fit_transform(X_train))
X_train_norm.columns = X_train.columns
X_train_norm.index = X_train.index

X_test_norm = pd.DataFrame(scaler.transform(X_test))
X_test_norm.columns = X_test.columns
X_test_norm.index = X_test.index

In [27]:
params_net = {'input_size': X_train_norm.shape[1],
             'hidden_size': [1024, 512, 1024],
             'output_size': y_train.shape[1],
             'dropout': 0.01}

params_fit = {'X':X_train_norm,
             'y': y_train,
             'epoch': 5,
             'lr': 1e-4,
             'batch_size':128,
             'L1': 1e-6,
             'L2': 1e-6,
             'pos_weight':1,
             'verbose':True}

net = DenseNet(**params_net)
model = Model(net)
model.fit(**params_fit)

Epoch [1, 0] : loss 1.0131765604019165
Epoch [1, 3840] : loss 0.9746513366699219
Epoch [1, 7680] : loss 0.8821326494216919
Epoch [1, 11520] : loss 0.8356621861457825
Epoch [1, 15360] : loss 0.8235074281692505


In [None]:
y_train_pred = model.predict_proba(X_train_norm)
y_test_pred = model.predict_proba(X_test_norm)

In [10]:
net = DenseBlock(3, 2, 0.02)

In [12]:
for a in net.linear.parameters():
    print(a)

Parameter containing:
tensor([[-0.4545, -0.4456, -0.5394],
        [ 0.2368,  0.1636, -0.5732]], requires_grad=True)
Parameter containing:
tensor([-0.2033, -0.5588], requires_grad=True)


In [14]:
torch.cat([x.view(-1) for x in net.linear.parameters()])

tensor([-0.4545, -0.4456, -0.5394,  0.2368,  0.1636, -0.5732, -0.2033, -0.5588],
       grad_fn=<CatBackward>)

In [15]:
a = torch.tensor([])
a = torch.cat([a, torch.cat([x.view(-1) for x in net.linear.parameters()])])

In [16]:
a

tensor([-0.4545, -0.4456, -0.5394,  0.2368,  0.1636, -0.5732, -0.2033, -0.5588],
       grad_fn=<CatBackward>)

In [17]:
b = torch.cat([a, torch.cat([x.view(-1) for x in net.linear.parameters()])])
b

tensor([-0.4545, -0.4456, -0.5394,  0.2368,  0.1636, -0.5732, -0.2033, -0.5588,
        -0.4545, -0.4456, -0.5394,  0.2368,  0.1636, -0.5732, -0.2033, -0.5588],
       grad_fn=<CatBackward>)

In [23]:
net2 = DenseNet(**params_net)
net2.regular_loss(0.001, 0.001)

tensor(38.3275, grad_fn=<AddBackward0>)