In [None]:
server = "SBM"
GPU = 0
sel_cv_idx = 8
num_workers = 4
seed = 0

In [None]:
import numpy as np
outer_cv_part = np.arange(sel_cv_idx * 2, sel_cv_idx * 2 + 2)
print("Selected Fold: {}".format(outer_cv_part))

Selected Fold: [16 17]


In [None]:
from sklearn.model_selection import ParameterGrid

extr_cand = [1024]
pred_cand = [1024]
disc_cand = [1024]

dropout_cand = [0.7]
dropout_reg_cand = [0.95]
batch_size_cand = [32]
lr_cand = [5e-05]
epochs_cand = [150]

hsp_extr_cand = [0.975]
hsp_pred_cand = [0.1, 0.5, 0.9]
hsp_disc_cand = [0.1, 0.5, 0.9]

lambda_cand = [0.02]
l2_param_cand = [5e-03]

param_cand = {
    "1_extr": extr_cand, "2_pred": pred_cand, "3_disc": disc_cand, 
    "dropout": dropout_cand, "dropout_reg": dropout_reg_cand,
    "batch_size": batch_size_cand, "lr": lr_cand, "epochs": epochs_cand,
    "lambda_": lambda_cand, "l2_param": l2_param_cand,
    "1_hsp_extr": hsp_extr_cand, "2_hsp_pred": hsp_pred_cand, "3_hsp_disc": hsp_disc_cand
}

In [None]:
import os
import gc
import time
import pickle
import random
import itertools
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from decimal import Decimal
from datetime import datetime as dt
from pytz import timezone

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable, Function
import torch.optim as optim
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from torch.optim.swa_utils import AveragedModel, SWALR
from torch.optim.lr_scheduler import ReduceLROnPlateau 
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU)

In [None]:
def seed_everything(seed=seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
seed_everything(seed)

In [None]:
nowtime = dt.now(timezone("Asia/Seoul")); year = str(nowtime.year)[2:]
month = '0{}'.format(nowtime.month) if nowtime.month < 10 else str(nowtime.month)
day = '0{}'.format(nowtime.day) if nowtime.day < 10 else str(nowtime.day)
hour = '0{}'.format(nowtime.hour) if nowtime.hour < 10 else str(nowtime.hour)
minute = '0{}'.format(nowtime.minute) if nowtime.minute < 10 else str(nowtime.minute)
sec = '0{}'.format(nowtime.second) if nowtime.second < 10 else str(nowtime.second)
msec = str(nowtime.microsecond)[:2]

In [None]:
save_path = "/users/hjw/data/Nested_CV/test"
output_folder = "{}/{}{}{}_{}".format(save_path, year, month, day, server)
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
print(output_folder)

/users/hjw/data/Nested_CV/test/210802_SBM


In [None]:
data = np.load("/users/hjw/data/ABCD/npz_files/rsfc_p_site_scanner_si_ge.npz", allow_pickle=True)
X = stats.zscore(data["X"], axis=1)
y = data["y"]
print(X.shape, y.shape)

(6905, 61776) (6905, 3)


In [None]:
p_factor_idx = 0
site_idx = 1
scanner_idx = 2

In [None]:
y = np.array(y, dtype=np.float)
y[:, site_idx] = y[:, site_idx].astype(np.int)
y[:, scanner_idx] = y[:, scanner_idx].astype(np.int)

In [None]:
# Spliting subject indices for leave-one-site-out validation set from two sites
y_df = pd.DataFrame(y, columns=["p-factor", "site", "scanner"])
site_unq = np.unique(y[:, site_idx])
data_idx = np.arange(y.shape[0])

outer_train_folds_idx = []
outer_test_folds_idx = []
inner_folds_idx = []

split_seed = 0
n_outer_repeat = len(site_unq)
n_inner_repeat = 3

# Outer loop
for n_outer, outer_test_site in enumerate(site_unq):
    outer_train_idx = np.where(y[:, site_idx] != outer_test_site)[0]
    outer_test_idx = np.where(y[:, site_idx] == outer_test_site)[0]
    outer_train_folds_idx.append(outer_train_idx)
    outer_test_folds_idx.append(outer_test_idx)
    
    outer_test_df = y_df.iloc[outer_test_idx]
    outer_train_df = y_df.iloc[outer_train_idx]
    valid_0_df = outer_train_df[outer_train_df["scanner"] == 0]
    valid_1_df = outer_train_df[outer_train_df["scanner"] == 1]
    valid_0_site_unq = pd.Series(np.unique(valid_0_df["site"]))
    valid_1_site_unq = pd.Series(np.unique(valid_1_df["site"]))

    inner_train_folds_idx = []
    inner_valid_folds_idx = []
    
    inner_valid_site_0_list = valid_0_site_unq.sample(
        n=n_inner_repeat, replace=False, random_state=split_seed)
    inner_valid_site_1_list = valid_1_site_unq.sample(
        n=n_inner_repeat, replace=False, random_state=split_seed)
    
    # Inner loop
    for n_inner in range(n_inner_repeat):
        inner_valid_site_0 = inner_valid_site_0_list.values[n_inner]
        inner_valid_site_1 = inner_valid_site_1_list.values[n_inner]
        inner_valid_site = [inner_valid_site_0, inner_valid_site_1]
        inner_valid_cond_0 = (outer_train_df["site"] == inner_valid_site_0)
        inner_valid_cond_1 = (outer_train_df["site"] == inner_valid_site_1)
        inner_valid_df = outer_train_df[inner_valid_cond_0 | inner_valid_cond_1]

        inner_train_idx = np.setdiff1d(
            outer_train_df.index.values, inner_valid_df.index.values)
        inner_valid_idx = inner_valid_df.index.values
        print("[{}/{}] inner fold: train: {}, valid: {}".
              format(n_inner + 1, n_inner_repeat, len(inner_train_idx), len(inner_valid_idx)),
              end=", ")
        print("valid site: {}, {}".format(int(inner_valid_site[0]), int(inner_valid_site[1])))
        inner_train_folds_idx.append(inner_train_idx)
        inner_valid_folds_idx.append(inner_valid_idx)
        
    inner_folds_idx.append([inner_train_folds_idx, inner_valid_folds_idx])
    
    outer_test_scnr_label = np.unique(outer_test_df["scanner"])
    outer_train_scnr_label = np.unique(outer_train_df["scanner"])
    inner_valid_scnr_label = np.unique(inner_valid_df["scanner"])
    
    print("[{}/{}] outer fold: train: {}, test: {}"
          .format(n_outer + 1, len(site_unq), len(outer_train_idx), len(outer_test_idx)), 
          end=" --> ")
    print("outer test site: {}\n".format(int(outer_test_site)))

[1/3] inner fold: train: 6054, valid: 563, valid site: 10, 12
[2/3] inner fold: train: 5631, valid: 986, valid site: 4, 21
[3/3] inner fold: train: 5998, valid: 619, valid site: 8, 9
[1/18] outer fold: train: 6617, test: 288 --> outer test site: 2

[1/3] inner fold: train: 5801, valid: 563, valid site: 10, 12
[2/3] inner fold: train: 5378, valid: 986, valid site: 4, 21
[3/3] inner fold: train: 5745, valid: 619, valid site: 8, 9
[2/18] outer fold: train: 6364, test: 541 --> outer test site: 3

[1/3] inner fold: train: 5613, valid: 788, valid site: 13, 11
[2/3] inner fold: train: 5753, valid: 648, valid site: 18, 20
[3/3] inner fold: train: 5735, valid: 666, valid site: 10, 7
[3/18] outer fold: train: 6401, test: 504 --> outer test site: 4

[1/3] inner fold: train: 6045, valid: 563, valid site: 10, 12
[2/3] inner fold: train: 5622, valid: 986, valid site: 4, 21
[3/3] inner fold: train: 5989, valid: 619, valid site: 8, 9
[4/18] outer fold: train: 6608, test: 297 --> outer test site: 5

[1

In [None]:
mode = "max"
lr_patience = 5
min_lr = 1e-08
lr_factor = 0.25

swa_lr = 5e-03
momentum = 0.90
l1_param = 0
early_stopping_patience = 150

input_dim = 61776
n_classes = len(np.unique(y[:, scanner_idx]))
output_reg_dim = 1
output_clf_dim = n_classes

wsc_flag = [1, 1, 1]
beta_lr = [1e-04, 1e-03, 1e-03]
max_beta = [1e-02, 5e-02, 5e-02]
n_wsc = wsc_flag.count(1)

outer_n_splits = n_outer_repeat
inner_n_splits = n_inner_repeat

In [None]:
# Training dataset
class train_dataset(Dataset): 
    def __init__(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        
    def __len__(self):
        return len(self.X_train)
    
    def __getitem__(self, idx): 
        X_train = torch.from_numpy(self.X_train[idx]).type(torch.FloatTensor)
        y_train = torch.from_numpy(self.y_train[idx]).type(torch.FloatTensor)

        return X_train, y_train

In [None]:
# Test dataset
class valid_dataset(Dataset): 
    def __init__(self, X_valid, y_valid):
        self.X_valid = X_valid
        self.y_valid = y_valid
        
    def __len__(self):
        return len(self.X_valid)
    
    def __getitem__(self, idx): 
        X_valid = torch.from_numpy(self.X_valid[idx]).type(torch.FloatTensor)
        y_valid = torch.from_numpy(self.y_valid[idx]).type(torch.FloatTensor)
        
        return X_valid, y_valid

In [None]:
# Test dataset
class test_dataset(Dataset): 
    def __init__(self, X_test, y_test):
        self.X_test = X_test
        self.y_test = y_test
        
    def __len__(self):
        return len(self.X_test)
    
    def __getitem__(self, idx): 
        X_test = torch.from_numpy(self.X_test[idx]).type(torch.FloatTensor)
        y_test = torch.from_numpy(self.y_test[idx]).type(torch.FloatTensor)
        
        return X_test, y_test

In [None]:
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

In [None]:
class GradientReversalFunction(Function):
    """
    Gradient Reversal Layer from:
    Unsupervised Domain Adaptation by Backpropagation (Ganin & Lempitsky, 2015)
    Forward pass is the identity function. In the backward pass,
    the upstream gradients are multiplied by -lambda (i.e. gradient is reversed)
    """

    @staticmethod
    def forward(ctx, x, lambda_):
        ctx.lambda_ = lambda_
        return x.clone()

    @staticmethod
    def backward(ctx, grads):
        lambda_ = ctx.lambda_
        lambda_ = grads.new_tensor(lambda_)
        dx = -lambda_ * grads
        return dx, None

In [None]:
class GradientReversal(torch.nn.Module):
    def __init__(self, lambda_=0.0):
        super(GradientReversal, self).__init__()
        self.lambda_ = lambda_

    def forward(self, x):
        return GradientReversalFunction.apply(x, self.lambda_)

In [None]:
class DNN(nn.Module):
    def __init__(self, extr_hidden, disc_hidden, pred_hidden, 
                 dropout_rate, dropout_reg, lambda_, act_func_name):
        super(DNN, self).__init__()
        self.ext_1 = nn.Linear(input_dim, extr_hidden)
        self.ext_bn_1 = nn.BatchNorm1d(extr_hidden)
        
        self.reg_1 = nn.Linear(extr_hidden, pred_hidden)
        self.reg_bn_1 = nn.BatchNorm1d(pred_hidden)
        self.reg_2 = nn.Linear(pred_hidden, output_reg_dim)
        
        self.clf_1 = nn.Linear(extr_hidden, disc_hidden)
        self.clf_bn_1 = nn.BatchNorm1d(disc_hidden)
        self.clf_2 = nn.Linear(disc_hidden, output_clf_dim)

        self.GradientReversal = GradientReversal(lambda_)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.dropout_reg = nn.Dropout(p=dropout_reg)
        self.act_func = get_activation_function(act_func_name)
        self.weights_init()
    
    def forward(self, x):
        feature = self.ext_1(x)
        feature = self.ext_bn_1(feature)
        feature = self.act_func(feature)
        feature = self.dropout(feature)
        
        x_reg = self.reg_1(feature)
        x_reg = self.reg_bn_1(x_reg)
        x_reg = self.act_func(x_reg)
        x_reg = self.dropout_reg(x_reg)
        x_reg = self.reg_2(x_reg)
        
        x_clf = self.GradientReversal(feature)
        x_clf = self.clf_1(x_clf)
        x_clf = self.clf_bn_1(x_clf)
        x_clf = self.act_func(x_clf)
        # x_clf = self.dropout(x_clf)
        x_clf = self.clf_2(x_clf)
        
        return x_reg, x_clf
    
    def weights_init(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode="fan_in", nonlinearity="relu")
                nn.init.normal_(m.bias, std=0.01)

In [None]:
def get_optimizer(model, opt_name, learning_rate=None, l2_param=None):
    lower_opt_name = opt_name.lower()
    if lower_opt_name == 'momentum':
        return optim.SGD(model.parameters(), lr=learning_rate, 
                         momentum=momentum, weight_decay=l2_param)
    elif lower_opt_name == 'nag':
        return optim.SGD(model.parameters(), lr=learning_rate, 
                         momentum=momentum, weight_decay=l2_param, nesterov=True)
    elif lower_opt_name == 'adam':
        return optim.Adam(model.parameters(), lr=learning_rate, 
                          weight_decay=l2_param)
    else:
        sys.exit("Illegal arguement for optimizer type")

In [None]:
def get_activation_function(act_func_name):
    act_func_name = act_func_name.lower()
    if act_func_name == 'relu':
        return nn.ReLU()
    elif act_func_name == 'prelu':
        return nn.PReLU()
    elif act_func_name == 'elu':
        return nn.ELU()
    elif act_func_name == 'silu':
        return nn.SiLU()
    elif act_func_name == 'leakyrelu':
        return nn.LeakyReLU()
    elif act_func_name == 'tanh':
        return nn.Tanh()
    else:
        sys.exit("Illegal arguement for activation function type")

In [None]:
def init_hsp(n_wsc, epochs):
    hsp_val = np.zeros(n_wsc)
    beta_val = hsp_val.copy()
    hsp_list = np.zeros((n_wsc, epochs))
    beta_list = np.zeros((n_wsc, epochs))
    
    return hsp_val, beta_val, hsp_list, beta_list

In [None]:
# Weight sparsity control with Hoyer's sparsness (Layer wise)
def calc_hsp(w, beta, max_beta, beta_lr, tg_hsp):
    
    # Get value of weight
    [dim, n_nodes] = w.shape
    num_elements = dim * n_nodes
    norm_ratio = torch.norm(w, 1) / torch.norm(w, 2)

    # Calculate hoyer's sparsity level
    num = np.sqrt(num_elements) - norm_ratio.item()
    den = np.sqrt(num_elements) - 1
    hsp = num / den

    # Update beta
    beta = beta + beta_lr * np.sign(tg_hsp - hsp)
    
    # Trim value
    beta = -max_beta if beta < -max_beta else beta
    beta = max_beta if beta > max_beta else beta

    return [hsp, beta]

In [None]:
def l1_penalty(model, epoch, hsp_val, beta_val, hsp_list, beta_list, tg_hsp):
    l1_reg = None
    layer_idx = 0
    wsc_idx = 0

    for name, param in model.named_parameters():
        if "weight" in name and "bn" not in name:
            if "ext" in name or "reg_1" in name or "clf_1" in name:
                temp_w = param
                
                if wsc_flag[layer_idx] != 0:
                    hsp_val[wsc_idx], beta_val[wsc_idx] = calc_hsp(
                        temp_w, beta_val[wsc_idx], max_beta[wsc_idx], 
                        beta_lr[wsc_idx], tg_hsp[wsc_idx]
                    )
                    hsp_list[wsc_idx, epoch - 1] = hsp_val[wsc_idx]
                    beta_list[wsc_idx, epoch - 1] = beta_val[wsc_idx]
                    layer_reg = torch.norm(temp_w, 1) * beta_val[wsc_idx]
                    wsc_idx += 1
                else:
                    layer_reg = torch.norm(temp_w, 1).item() * l1_param

                if l1_reg is None:
                    l1_reg = layer_reg
                else:
                    l1_reg = l1_reg + layer_reg
                layer_idx += 1
        
    return l1_reg

In [None]:
def pearsonr(x, y):
    x_mean = torch.mean(x)
    y_mean = torch.mean(y)
    xx = x.sub(x_mean)
    yy = y.sub(y_mean)
    num = xx.dot(yy)
    den = torch.norm(xx, 2) * torch.norm(yy, 2)
    corr = num / den
    return corr

In [None]:
def train(model, epoch, train_loader, optimizer, criterion_clf, criterion_reg, 
          hsp_val, beta_val, hsp_list, beta_list, tg_hsp):
    model.train()
    reg_loss = 0
    clf_loss = 0
    clf_acc = 0
    total = 0
    correct = 0
    y_train_true = []
    y_train_pred = []
    
    for batch_idx, (input, target) in enumerate(train_loader):
        optimizer.zero_grad()
        input, target = input.to(DEVICE), target.to(DEVICE)
        output_reg, output_clf = model(input)
        target_clf = target[:, scanner_idx].long().view(-1)
        target_reg = target[:, p_factor_idx].view(-1, 1)
        running_clf_loss = criterion_clf(output_clf, target_clf)
        running_reg_loss = criterion_reg(output_reg, target_reg)
        l1_term = l1_penalty(model, epoch, hsp_val, beta_val, hsp_list, beta_list, tg_hsp)
        running_loss = running_clf_loss + running_reg_loss + l1_term
        cost = running_loss
        cost.backward()
        optimizer.step()
        clf_loss += running_clf_loss.item()
        reg_loss += running_reg_loss.item()
        total += output_reg.size(0)
        _, pred = torch.max(output_clf.data, 1)
        correct += (pred.view(-1, 1) == target).sum().item()
        true_batch = torch.flatten(target_reg.detach())
        pred_batch = torch.flatten(output_reg.detach())
        y_train_true.append(true_batch)
        y_train_pred.append(pred_batch)
        
    reg_loss /= total
    clf_loss /= total
    clf_acc = 100 * correct / total
    y_train_true = torch.flatten(torch.stack(y_train_true))
    y_train_pred = torch.flatten(torch.stack(y_train_pred))
    train_corr = pearsonr(y_train_true, y_train_pred)
    return clf_loss, reg_loss, clf_acc, train_corr

In [None]:
def valid(model, epoch, valid_loader, criterion_clf, criterion_reg):
    model.eval()
    reg_loss = 0
    clf_loss = 0
    clf_acc = 0
    correct = 0
    total = 0
    y_valid_true = []
    y_valid_pred = []
    
    with torch.no_grad():
        for input, target in valid_loader:
            input, target = input.to(DEVICE), target.to(DEVICE)
            output_reg, output_clf = model(input)
            target_clf = target[:, scanner_idx].long().view(-1)
            target_reg = target[:, p_factor_idx].view(-1, 1)
            running_clf_loss = criterion_clf(output_clf, target_clf)
            running_reg_loss = criterion_reg(output_reg, target_reg)
            clf_loss += running_clf_loss.item()
            reg_loss += running_reg_loss.item()
            total += output_reg.size(0)
            _, pred = torch.max(output_clf.data, 1)
            correct += (pred.view(-1, 1) == target).sum().item()
            true_batch = torch.flatten(target_reg.detach())
            pred_batch = torch.flatten(output_reg.detach())
            y_valid_true.append(true_batch)
            y_valid_pred.append(pred_batch)

    clf_acc = 100 * correct / total
    y_valid_true = torch.flatten(torch.stack(y_valid_true))
    y_valid_pred = torch.flatten(torch.stack(y_valid_pred))
    valid_corr = pearsonr(y_valid_true, y_valid_pred)
    return clf_loss, reg_loss, clf_acc, valid_corr

In [None]:
def test(model, epoch, test_loader, criterion_clf, criterion_reg):
    model.eval()
    reg_loss = 0
    total = 0
    y_test_true = []
    y_test_pred = []
    
    with torch.no_grad():
        for input, target in test_loader:
            input, target = input.to(DEVICE), target.to(DEVICE)
            output_reg, output_clf = model(input)
            target_reg = target[:, p_factor_idx].view(-1, 1)
            running_reg_loss = criterion_reg(output_reg, target_reg)
            reg_loss += running_reg_loss.item()
            total += output_reg.size(0)
            true_batch = torch.flatten(target_reg.detach())
            pred_batch = torch.flatten(output_reg.detach())
            y_test_true.append(true_batch)
            y_test_pred.append(pred_batch)

    y_test_true = torch.flatten(torch.stack(y_test_true))
    y_test_pred = torch.flatten(torch.stack(y_test_pred))
    test_corr = pearsonr(y_test_true, y_test_pred)
    return reg_loss, test_corr

In [None]:
class early_stopping_func:
    def __init__(self, patience=5, verbose=False, delta=0, path=None):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_epoch = 0
        self.best_corr = 0
        self.early_stop = False
        self.valid_corr_max = -np.Inf
        self.delta = delta
        self.path = path
    
    def __call__(self, valid_loss, model, epoch, train_corr, valid_corr, test_corr):
        if self.best_corr is None:
            self.best_corr = valid_corr
            self.best_corr_list = [train_corr, valid_corr, test_corr]
            self.save_checkpoint(valid_loss, model, epoch)
        elif valid_corr < self.best_corr + self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                print("Early Stopping! Best Model at Epoch {}"
                      .format(self.best_epoch), end=", ")
                print("valid corr: {:.4f}, test corr: {:.4f}"
                      .format(self.best_corr_list[1], self.best_corr_list[2]))
        else:
            self.best_corr = valid_corr
            self.best_corr_list = [train_corr, valid_corr, test_corr]
            self.save_checkpoint(self.best_corr, model, epoch)
            self.counter = 0

    def save_checkpoint(self, best_corr, model, epoch):
        if self.verbose:
            print("Validation Corr Increased! ({:.4f} --> {:.4f}), Saving the Model!"
                  .format(self.valid_corr_max, best_corr))
        # torch.save(model.state_dict(), self.path + "/early_stopped_model.pt")
        self.valid_corr_max = best_corr
        self.best_epoch = epoch

In [None]:
def plot_learning_curves(
    save_dir, epochs, train_loss, valid_loss,  
    train_corr, valid_corr, train_acc, valid_acc, lr,
    plot_hsp_list, plot_beta_list, tg_hsp):
    
    sns.set(style="dark", font_scale=2)
    fig, ax = plt.subplots(2, 3, figsize=(28, 10))
    ax = ax.flat
    lw = 3.5
    last_epoch = epochs
    
    train_loss, valid_loss = np.array(train_loss), np.array(valid_loss)
    
    ax[0].plot(train_loss[:last_epoch, 0], label='train disc loss', lw=lw, color="r")
    ax[0].legend()
    ax[0].set_title("Discriminator Loss Plot", pad=20)

    ax[1].plot(train_loss[:last_epoch, 1], label='train pred loss', lw=lw, color="r")
    ax[1].plot(valid_loss[:last_epoch, 1], label='valid pred loss', lw=lw, color="g")
    ax[1].legend()
    ax[1].set_title("Predictor Loss Plot", pad=20)

    ax[2].plot(lr[:last_epoch], label='learning rate', lw=lw, color="k")
    ax[2].legend()
    ax[2].set_title("Learning Rate Plot", pad=20)

    ax[3].plot(train_corr[:last_epoch], label='train corr', lw=lw, color="r")
    ax[3].plot(valid_corr[:last_epoch], label='valid corr', lw=lw, color="g")
    ax[3].legend()
    ax[3].set_title("Correlation Plot ($r$={:.4f})".format(valid_corr[-1]), pad=20)

    plot_hsp_list, plot_beta_list = np.array(plot_hsp_list).T, np.array(plot_beta_list).T
    
    for idx, n_layer in enumerate(indices):
        ax[4].plot(plot_hsp_list[idx], label='layer{}'.format(n_layer), lw=lw)
        ax[5].plot(plot_beta_list[idx], 
                   label='layer{}'.format(n_layer), lw=lw)
        ax[4].legend(); ax[5].legend()
        ax[4].set_title("HSP plot [{:.3f}/{:.3f}]"
                        .format(plot_hsp_list[0, -1], tg_hsp[0][0]), pad=20)
        ax[5].set_title("Beta plot", pad=20)
    
    fig.tight_layout()
    fig.savefig("{}/Learning_curves.png".format(save_dir))
    
    plt.close(fig)

In [None]:
print_epoch = 50

In [None]:
def run_inner_fold(output_save_dir=None, cur_tg_hsp=None):
    inner_cv = []
    
    for n_inner_cv in range(inner_n_splits):
        
        print("\n===================================", end=" ")
        print("Inner Fold [{}/{}]".format(n_inner_cv + 1, inner_n_splits), end=" ")
        print("===================================")

        inner_start_fold_time = time.time()
        inner_save_dir = "{}/Inner_fold_{}".format(output_save_dir, n_inner_cv + 1)
        os.makedirs(inner_save_dir, exist_ok=True)

        inner_train_idx = inner_train_folds_idx[n_inner_cv]
        inner_valid_idx = inner_valid_folds_idx[n_inner_cv]

        X_train, y_train = X[inner_train_idx], y[inner_train_idx]
        X_valid, y_valid = X[inner_valid_idx], y[inner_valid_idx]

        inner_train_dataset = train_dataset(X_train, y_train)
        inner_valid_dataset = train_dataset(X_valid, y_valid)

        inner_train_loader = DataLoader(
            inner_train_dataset, batch_size=batch_size, pin_memory=True,
            shuffle=True, num_workers=num_workers, drop_last=True)
        inner_valid_loader = DataLoader(
            inner_valid_dataset, batch_size=len(y_valid), pin_memory=True,
            shuffle=True, num_workers=num_workers, drop_last=True)

        # Assign model
        model = DNN(
            extr_hidden, disc_hidden, pred_hidden, dropout_rate, dropout_reg, lambda_, act_func_name
        ).to(DEVICE)
        optimizer = get_optimizer(model, optimizer_name, learning_rate, l2_param)
        scheduler = ReduceLROnPlateau(
            optimizer, mode=mode, patience=lr_patience, min_lr=min_lr, factor=lr_factor
        )
        criterion_clf = nn.CrossEntropyLoss()
        criterion_reg = nn.MSELoss(reduction="mean")

        # list to save learning parameters
        inner_train_loss = []
        inner_valid_loss = []
        inner_train_corr = []
        inner_valid_corr = []
        inner_train_acc = []
        inner_valid_acc = []
        inner_lr = []
        inner_hsp_list = []
        inner_beta_list = []

        hsp_val, beta_val, hsp_list, beta_list = init_hsp(n_wsc, epochs)

        for epoch in range(1, epochs + 1):
            train_clf_loss, train_reg_loss, train_acc, train_corr = train(
                model, epoch, inner_train_loader, 
                optimizer, criterion_clf, criterion_reg, 
                hsp_val, beta_val, hsp_list, beta_list, cur_tg_hsp
            )
            valid_clf_loss, valid_reg_loss, valid_acc, valid_corr = valid(
                model, epoch, inner_valid_loader, criterion_clf, criterion_reg
            )

            scheduler.step(hsp_val[0])
            lr = optimizer.param_groups[0]['lr']

            inner_train_loss.append([train_clf_loss, train_reg_loss])
            inner_train_corr.append(train_corr)
            inner_train_acc.append(train_acc)
            inner_valid_loss.append([valid_clf_loss, valid_reg_loss])
            inner_valid_corr.append(valid_corr)
            inner_valid_acc.append(valid_acc)
            inner_lr.append(lr)
            inner_hsp_list.append(list(hsp_val))
            inner_beta_list.append(list(beta_val))

            if epoch % print_epoch == 0:
                print("\nEpoch [{:d}/{:d}]".format(epoch, epochs), end=" ")
                print("Train corr: {:.4f}, Valid corr: {:.4f}".format(train_corr, valid_corr))
                for i in range(len(wsc_flag)):
                    if wsc_flag[i] != 0:
                        print("Layer {:d}: [{:.4f}/{:.4f}]".
                              format(i + 1, hsp_val[i], cur_tg_hsp[i][0]), end=" ")
                # print("\nCurrent learning rate: {:.2e}".format(Decimal(str(lr))))

            plot_learning_curves(
                inner_save_dir, epochs, inner_train_loss, inner_valid_loss,
                inner_train_corr, inner_valid_corr, 
                inner_train_acc, inner_valid_acc, 
                inner_lr, inner_hsp_list, inner_beta_list, cur_tg_hsp
            )
        print("\nInner Fold [{}/{}] train corr: {:.4f}, valid corr: {:.4f}"
              .format(n_inner_cv + 1, inner_n_splits, train_corr, valid_corr))
        """
        torch.save(model.state_dict(),
                   inner_save_dir + "/model_fold_" + str(n_outer_cv + 1) + ".pt")
        """
        torch.cuda.empty_cache()
        gc.collect()
        inner_tot_time = (time.time() - inner_start_fold_time) / 60
        print("Execution Time for Fold: {:.2f} mins".format(inner_tot_time))
        inner_cv.append([train_corr.detach().cpu().numpy(), valid_corr.detach().cpu().numpy()])
            
    inner_cv_df = pd.DataFrame(np.array(inner_cv), columns=["train", "valid"])
    avg_train_corr = inner_cv_df["train"].mean()
    avg_valid_corr = inner_cv_df["valid"].mean()
    inner_cv_df.to_csv("{}/inner_cv.csv".format(output_save_dir))

    return avg_train_corr, avg_valid_corr

In [None]:
def run_outer_fold(n_outer_cv=0, outer_save_dir=None, sel_tg_hsp=None):

    # Outer fold
    print("\n===================================", end=" ")
    print("Outer Fold [{}/{}]".format(n_outer_cv + 1, outer_n_splits), end=" ")
    print("===================================")
    
    outer_start_fold_time = time.time()
    outer_train_idx = outer_train_folds_idx[n_outer_cv]
    outer_test_idx = outer_test_folds_idx[n_outer_cv]

    X_train, y_train = X[outer_train_idx], y[outer_train_idx]
    X_test, y_test = X[outer_test_idx], y[outer_test_idx]
    
    outer_train_dataset = train_dataset(X_train, y_train)
    outer_test_dataset = test_dataset(X_test, y_test)
    
    outer_train_loader = DataLoader(
        outer_train_dataset, batch_size=batch_size, pin_memory=True,
        shuffle=True, num_workers=num_workers, drop_last=True)
    outer_test_loader = DataLoader(
        outer_test_dataset, batch_size=len(y_test), pin_memory=True,
        shuffle=True, num_workers=num_workers, drop_last=True)
        
    # Assign model 
    model = DNN(
        extr_hidden, disc_hidden, pred_hidden, dropout_rate, dropout_reg, lambda_, act_func_name
    ).to(DEVICE)
    optimizer = get_optimizer(model, optimizer_name, learning_rate, l2_param)
    scheduler = ReduceLROnPlateau(
        optimizer, mode=mode, patience=lr_patience, min_lr=min_lr, factor=lr_factor
    )
    criterion_clf = nn.CrossEntropyLoss()
    criterion_reg = nn.MSELoss(reduction="mean")
              
    # list to save learning parameters
    outer_train_loss = []
    outer_test_loss = []
    outer_train_corr = []
    outer_test_corr = []
    outer_train_acc = []
    outer_test_acc = []
    outer_lr = []
    outer_hsp_list = []
    outer_beta_list = []

    hsp_val, beta_val, hsp_list, beta_list = init_hsp(n_wsc, epochs)
        
    for epoch in range(1, epochs + 1):
        train_clf_loss, train_reg_loss, train_acc, train_corr = train(
            model, epoch, outer_train_loader, 
            optimizer, criterion_clf, criterion_reg, 
            hsp_val, beta_val, hsp_list, beta_list, sel_tg_hsp
        )
        test_reg_loss, test_corr = test(
            model, epoch, outer_test_loader, criterion_clf, criterion_reg
        )

        scheduler.step(hsp_val[0])
        lr = optimizer.param_groups[0]['lr']
        
        outer_train_loss.append([train_clf_loss, train_reg_loss])
        outer_train_corr.append(train_corr)
        outer_train_acc.append(train_acc)
        outer_test_loss.append([[], test_reg_loss])
        outer_test_corr.append(test_corr)
        outer_test_acc.append([])
        outer_lr.append(lr)
        outer_hsp_list.append(list(hsp_val))
        outer_beta_list.append(list(beta_val))

        if epoch % print_epoch == 0:
            print("\nEpoch [{:d}/{:d}]".format(epoch, epochs), end=" ")
            print("Train corr: {:.4f}, Test corr: {:.4f}"
                  .format(train_corr, test_corr))
            for i in range(len(wsc_flag)):
                if wsc_flag[i] != 0:
                    print("Layer {:d}: [{:.4f}/{:.4f}]".
                          format( i + 1, hsp_val[i], sel_tg_hsp[i][0]), end=" ")
            # print("\nCurrent learning rate: {:.2e}".format(Decimal(str(lr))))

        plot_learning_curves(
            outer_save_dir, epochs, outer_train_loss, outer_test_loss,  
            outer_train_corr, outer_test_corr, 
            outer_train_acc, outer_test_acc, 
            outer_lr, outer_hsp_list, outer_beta_list, sel_tg_hsp
        )
    
    torch.save(model.state_dict(), 
               outer_save_dir + "/model_fold_" + str(n_outer_cv + 1) + ".pt")
    
    torch.cuda.empty_cache()
    gc.collect()
    
    train_corr = train_corr.detach().cpu().numpy()
    test_corr = test_corr.detach().cpu().numpy()

    outer_tot_time = time.time() - outer_start_fold_time
    print("\nExecution Time for Fold: {:.2f} mins".format(outer_tot_time / 60))
    
    return train_corr, test_corr

In [None]:
param_grid = list(ParameterGrid(param_cand))

temp_param = param_grid[0]
act_func_name = "elu"
optimizer_name = "nag"

extr_hidden = temp_param["1_extr"]
pred_hidden = temp_param["2_pred"]
disc_hidden = temp_param["3_disc"]

dropout_rate = temp_param["dropout"]
dropout_reg = temp_param["dropout_reg"]

batch_size = temp_param["batch_size"]
learning_rate = temp_param["lr"]
epochs = temp_param["epochs"]

l2_param = temp_param["l2_param"]

In [None]:
code_start_time = time.time()

In [None]:
print(output_folder)

outer_cv = []

for n_outer_cv in outer_cv_part:
    print("\n===================================", end=" ")
    print("Outer Fold [{}/{}]".format(n_outer_cv + 1, outer_n_splits), end=" ")
    print("===================================")

    outer_save_dir = "{}/Outer_fold_{}".format(output_folder, n_outer_cv + 1)
    os.makedirs(outer_save_dir, exist_ok=True)

    inner_train_folds_idx = inner_folds_idx[n_outer_cv][0]
    inner_valid_folds_idx = inner_folds_idx[n_outer_cv][1]
    
    inner_cv = []
    
    # Inner Fold
    for param_idx, cur_param in enumerate(param_grid):
        print("\n===================================", end=" ")
        print("Param Cand [{}/{}]".format(param_idx + 1, len(param_grid)), end=" ")
        print("===================================")

        hsp_cand_1 = [cur_param["1_hsp_extr"]]
        hsp_cand_2 = [cur_param["2_hsp_pred"]]
        hsp_cand_3 = [cur_param["3_hsp_disc"]]

        indices = [i + 1 for i, x in enumerate(wsc_flag) if x == 1]
        hsp_cand_list = list(itertools.product(hsp_cand_1, hsp_cand_2, hsp_cand_3))
        hsp_cand_list = [list(i) for i in hsp_cand_list]
        hsp_cand = [hsp_cand_1, hsp_cand_2, hsp_cand_3]
        cur_tg_hsp = hsp_cand
        
        lambda_ = temp_param["lambda_"]
        print("Param:", end=" ")
        for i, param in enumerate(cur_param):
            if "hsp" in param or "lambda" in param: 
                print("{}: {}".format(param, cur_param[param]), end=" ")
        print("")
        
        cur_param_name = "hsp_{}_{}_{}".format(
            cur_tg_hsp[0][0], cur_tg_hsp[1][0], cur_tg_hsp[2][0]
        ) 
        param_save_dir = "{}/{}".format(outer_save_dir, cur_param_name)
        os.makedirs(param_save_dir, exist_ok=True)

        inner_train_corr, inner_valid_corr = run_inner_fold(param_save_dir, cur_tg_hsp)
        inner_cv.append([inner_train_corr, inner_valid_corr])
        
        print("\nParam Cand: [{}/{}] train corr: {:.4f}, valid corr: {:.4f}"
              .format(param_idx + 1, len(param_grid), inner_train_corr, inner_valid_corr))
    
    # Selecting hyperparameter
    inner_valid_cv = np.array(inner_cv)[:, 1]
    sel_idx = np.argmax(inner_valid_cv)
    sel_param = param_grid[sel_idx]
    sel_hsp = []
    print("Selected param:", end=" ")
    for x in sel_param:
        if "hsp" in x: 
            print("{}".format(sel_param[x]), end=" ")
            sel_hsp.append(sel_param[x])
    
    # Outer Fold
    hsp_cand_1 = [sel_param["1_hsp_extr"]]
    hsp_cand_2 = [sel_param["2_hsp_pred"]]
    hsp_cand_3 = [sel_param["3_hsp_disc"]]

    indices = [i + 1 for i, x in enumerate(wsc_flag) if x == 1]
    hsp_cand_list = list(itertools.product(hsp_cand_1, hsp_cand_2, hsp_cand_3))
    hsp_cand_list = [list(i) for i in hsp_cand_list]
    hsp_cand = [hsp_cand_1, hsp_cand_2, hsp_cand_3]
    sel_tg_hsp = hsp_cand

    lambda_ = sel_param["lambda_"]

    outer_train_corr, outer_test_corr = run_outer_fold(n_outer_cv, outer_save_dir, sel_tg_hsp)
    outer_cv.append([sel_hsp, outer_train_corr, outer_test_corr])
    
    print("\nOuter Fold [{}/{}]: train corr: {:.4f}, valid corr: {:.4f}"
          .format(n_outer_cv + 1, outer_n_splits, outer_train_corr, outer_test_corr))

/users/hjw/data/Nested_CV/test/210802_SBM


Param: 1_hsp_extr: 0.975 2_hsp_pred: 0.1 3_hsp_disc: 0.1 lambda_: 0.02 


Epoch [50/150] Train corr: 0.1624, Valid corr: 0.1467
Layer 1: [0.8872/0.9750] Layer 2: [0.1000/0.1000] Layer 3: [0.1000/0.1000] 
Epoch [100/150] Train corr: 0.3362, Valid corr: 0.1434
Layer 1: [0.9748/0.9750] Layer 2: [0.1000/0.1000] Layer 3: [0.1000/0.1000] 
Epoch [150/150] Train corr: 0.3498, Valid corr: 0.1452
Layer 1: [0.9749/0.9750] Layer 2: [0.1000/0.1000] Layer 3: [0.1000/0.1000] 
Inner Fold [1/3] train corr: 0.3498, valid corr: 0.1452
Execution Time for Fold: 21.70 mins


Epoch [50/150] Train corr: 0.1587, Valid corr: 0.1287
Layer 1: [0.9147/0.9750] Layer 2: [0.1000/0.1000] Layer 3: [0.1000/0.1000] 
Epoch [100/150] Train corr: 0.2529, Valid corr: 0.1364
Layer 1: [0.9747/0.9750] Layer 2: [0.1000/0.1000] Layer 3: [0.1000/0.1000] 
Epoch [150/150] Train corr: 0.3009, Valid corr: 0.1323
Layer 1: [0.9752/0.9750] Layer 2: [0.1000/0.1000] Layer 3: [0.1000/0.1000] 
Inne

OSError: [Errno 12] Cannot allocate memory

In [None]:
code_tot_time = time.time() - code_start_time 
print("Execution Time for the training: {:.2f} hours".format(code_tot_time / 60 / 60))

# May be 40 hours --> two days...