# HSC transfer learning code
### This is transfer learning code based on pytorch

## Set environment

In [1]:
import os
import os.path as pth
import torch
import gc
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import datasets, transforms, models
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import time
import copy
from torchsummary import summary
from datetime import datetime as dt
from pytz import timezone
from tqdm.notebook import tqdm
from sklearn.utils import shuffle
from sklearn.model_selection import KFold,StratifiedKFold,GroupKFold
from sklearn.model_selection import GroupShuffleSplit
from sklearn.utils.validation import check_array, check_random_state, _deprecate_positional_args
from sklearn.model_selection._split import _BaseKFold, _RepeatedSplits

from IPython.display import clear_output
import random
import matplotlib.patches as mpatches
import seaborn as sns

tz = timezone('Asia/Seoul')

plt.rcParams.update({'font.size': 16})



## Set parameter

In [2]:
config = {
    ### Setting gpu number to use for training
    'gpu_num': 2,
    
    ### Target task to classify
    'target_task': 4,
    'task_list': ['WM', 'MOTOR', 'EMOTION', 'RELATIONAL', 'SOCIAL', 'LANGUAGE', 'GAMBLING'],
    'classes_for_task' : [8,5,2,2,2,2,2],
    'output_size' : 0, #Output size will be designated accordingly later
    
    ### Number of subjects to train
    ### If you want to use total subject, just set 2000
    'train_n_subject': 20,
    'test_n_subject': 20,


    ### Set max beta, beta learning rate, etc.
    'hsc':{
        'type':'layer',
        'max_b': [1e-3, 1e-3],
        'b_lr': [1e-4, 1e-4], #3*1e-5
        'l2': 5*1e-4, 
        'l1': None,
        'patience':1,
    },
    
    ### Set hidden node combination
    'comb':{
        'comb_num': (5000,1000,),
    },


    ### Set activation function. Default to ReLU for all layers
    'activation':'relu',
    'activation2':'relu',

    ### Set dropout ratio, batch size, Normalization method
    'dropout_rate': 0.5,
    'batch_size': 512,
    'norm':{
        'type':'batch',
        'first': True,
        'second':True,
    },
    
    ### Set epochs to learn. This model has earlystopping method, so default setting to 2000.
    'num_epoch': 5,
    
    ### Set learning rate, momentum
    'learning_rate': 1e-3,
    'momentum' : 0.9,

    ### For cross-validation, set repeated group Kfold criteria.
    'num_fold': 5, # The number of folds in one validation,
    'num_repeat_of_fold': 10, # The number of trials of group Kfold

    ### Set early stopping criteria
    'es_patience': 200, # Model stops learning if there is no increasing in performance for this epochs on validation data
    'lr_patience':30, # Model starts annealing if there is no increasing in performance for this epochs on validation data
    'anneal_factor':0.3, # Annealing factor for ReduceLROnPlateau
    
    ### Set random seed for reproducibility
    'random_state': 7777,
}

In [17]:
### Target task setting
target_task_list = [config['task_list'][config['target_task']]] # 'WM', 'MOTOR', 'EMOTION', 'RELATIONAL', 'SOCIAL', 'LANGUAGE', 'GAMBLING'
task_idx_dict = {task:i for i, task in enumerate(config['task_list'])}
target_task_list = [task_idx_dict[target_task] for target_task in target_task_list]

### Set output class size accordingly
config['output_size'] = config['classes_for_task'][config['target_task']]    

# Target hoyer sparsity list
target_h_list = [

#     [0.8, 0.3],
    [0.8, 0.5],
#     [0.8, 0.8],
]

# Set [1,1] if want to control hoyer sparsity for both layers.
wsc_flag = [1,1]


### Set pretrained model
### 'RandomInit', 'FinetuneAE', 'FixAE', 'FinetuneRBM', 'FixRBM'
### [Model, pretrained 1st layer sparsity, denoising level]

pretrain_type_list = [

    
#     ['RandomInit', None, 0], 
    
#     ['FixRBM', 0.5, 0], 
#     ['FixAE', 0.5, 0], 
#     ['FixRBM', 0.8, 0], 
#     ['FixAE', 0.8, 0], 
#     ['FixRBM', 0.9, 0], 
#     ['FixAE', 0.9, 0], 

    
#     ['FinetuneAE', 0.5, 0], 
#     ['FinetuneRBM', 0.5, 0],
    ['FinetuneAE', 0.8, 0], 
#     ['FinetuneRBM', 0.8, 0],
#     ['FinetuneAE', 0.9, 0], 
#     ['FinetuneRBM', 0.9, 0],
]

#Randomseed for all
random_seed = config['random_state']
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(random_seed)

In [18]:
os.environ["CUDA_VISIBLE_DEVICES"] = str(config["gpu_num"])

print('Available devices:', torch.cuda.device_count())
print('Current cuda device:', torch.cuda.current_device())
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")

Available devices: 1
Current cuda device: 0


## Load data

In [19]:
from TL4fMRI_load_sbj import load_sbj

load = load_sbj(pretrain_type_list)
is_subject_valid = load.is_subject_valid
get_sbj_task_data = load.get_sbj_task_data
load_data = load.load_data

In [20]:
### Bring data according to training settings

# Load n data
train_samples, train_labels, train_subjects, test_samples, test_labels, test_subjects, task_str = load_data(config=config)


train_x = train_samples
train_y = train_labels if config['output_size'] ==2 else train_labels.reshape(-1,1)

test_x = test_samples
test_y = test_labels if config['output_size'] ==2 else test_labels.reshape(-1,1)

del(train_samples)
del(train_labels)
del(test_samples)
del(test_labels)

gc.collect()
        
print(f"Bring {task_str} task data")
print(f"Training data shape : input-{train_x.shape}, label-{train_y.shape}")
print(f"Training data shape : input-{test_x.shape}, label-{test_y.shape}")

100%|██████████| 20/20 [00:00<00:00, 75.88it/s]
 56%|█████▌    | 5/9 [00:00<00:00, 44.29it/s]

Loaded 20 training subjects


100%|██████████| 9/9 [00:00<00:00, 43.83it/s]


Loaded 9 test subjects
Bring SOCIAL task data
Training data shape : input-(5360, 5000), label-(5360, 2)
Training data shape : input-(4824, 5000), label-(4824, 2)


In [21]:
# Training dataset
class train_dataset(Dataset): 
    def __init__(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        
    def __len__(self):
        return len(self.X_train)
    
    def __getitem__(self, idx): 
        X_train = torch.from_numpy(self.X_train[idx]).type(torch.FloatTensor)
        y_train = torch.from_numpy(self.y_train[idx]).type(torch.LongTensor) if config['output_size'] >2 else torch.from_numpy(self.y_train[idx]).type(torch.FloatTensor)

        return X_train, y_train
    
# Test dataset
class test_dataset(Dataset): 
    def __init__(self, X_test, y_test):
        self.X_test = X_test
        self.y_test = y_test
        
    def __len__(self):
        return len(self.X_test)
    
    def __getitem__(self, idx): 
        X_test = torch.from_numpy(self.X_test[idx]).type(torch.FloatTensor)
        y_test = torch.from_numpy(self.y_test[idx]).type(torch.LongTensor) if config['output_size'] >2 else torch.from_numpy(self.y_test[idx]).type(torch.FloatTensor)
        
        return X_test, y_test

## Early stopping

In [22]:
class EarlyStopping_acc:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_acc_max = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_acc, model):

        score = val_acc

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_acc, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_acc, model)
            self.counter = 0

    def save_checkpoint(self, val_acc, model):
        if self.verbose:
            print(f'Validation Acc increased ({self.val_acc_max:.6f} --> {val_acc:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_acc_max = val_acc

## Repeated group kfold

In [23]:
class CustomGroupKFold(_BaseKFold):
    def __init__(self, n_splits=5, *, shuffle=False, random_state=None):
        super().__init__(n_splits=n_splits, shuffle=shuffle,
                         random_state=random_state)

    def _iter_test_indices(self, X, y, groups):
        if groups is None:
            raise ValueError("The 'groups' parameter should not be None.")
        groups = check_array(groups, ensure_2d=False, dtype=None)
        
        unique_groups, groups = np.unique(groups, return_inverse=True)
        n_groups = len(unique_groups)

        if self.n_splits > n_groups:
            raise ValueError("Cannot have number of splits n_splits=%d greater"
                             " than the number of groups: %d."
                             % (self.n_splits, n_groups))

        # Weight groups by their number of occurrences
        n_samples_per_group = np.bincount(groups)

        # Distribute the most frequent groups first
        indices = np.argsort(n_samples_per_group)[::-1]
        
        if self.shuffle:
            rng = check_random_state(self.random_state)
            for n_sample in np.unique(n_samples_per_group):
                same_n_indices_index = np.where(n_samples_per_group == n_sample)[0]
                target_chunk = indices[same_n_indices_index]
                rng.shuffle(target_chunk)
                indices[same_n_indices_index] = target_chunk

        n_samples_per_group = n_samples_per_group[indices]

        # Total weight of each fold
        n_samples_per_fold = np.zeros(self.n_splits)

        # Mapping from group index to fold index
        group_to_fold = np.zeros(len(unique_groups))

        # Distribute samples by adding the largest weight to the lightest fold
        for group_index, weight in enumerate(n_samples_per_group):
            lightest_fold = np.argmin(n_samples_per_fold)
            n_samples_per_fold[lightest_fold] += weight
            group_to_fold[indices[group_index]] = lightest_fold

        indices = group_to_fold[groups]

        for f in range(self.n_splits):
            yield np.where(indices == f)[0]

    def split(self, X, y=None, groups=None):
        return super().split(X, y, groups)
    

class RepeatedGroupKFold(_RepeatedSplits):
    @_deprecate_positional_args
    def __init__(self, *, n_splits=5, n_repeats=10, random_state=None):
        super().__init__(
            CustomGroupKFold, n_repeats=n_repeats,
            random_state=random_state, n_splits=n_splits)
        
    def split(self, X, y=None, groups=None):
        n_repeats = self.n_repeats
        rng = check_random_state(self.random_state)

        for idx in range(n_repeats):
            cv = self.cv(random_state=rng, shuffle=True,
                         **self.cvargs)
            for train_index, test_index in cv.split(X, y, groups):
                yield train_index, test_index

## Model

In [24]:
def set_activation_func():
    if config['activation'] == None:
        activ= False
    if config['activation'] == 'tanh':
        activ= nn.Tanh()
    if config['activation'] == 'relu':
        activ= nn.ReLU()
    if config['activation'] == 'lkrelu':
        activ= nn.LeakyReLU()
    if config['activation'] == 'elu':
        activ= nn.ELU()
    if config['activation'] == 'prelu':
        activ= nn.PReLU()
        
    if config['activation2'] == None:
        activ2= False
    if config['activation2'] == 'tanh':
        activ2= nn.Tanh()
    if config['activation2'] == 'relu':
        activ2= nn.ReLU()
    if config['activation2'] == 'lkrelu':
        activ2= nn.LeakyReLU()
    if config['activation2'] == 'elu':
        activ2= nn.ELU()
    if config['activation2'] == 'prelu':
        activ2= nn.PReLU()
        
    return activ, activ2

In [25]:
# Finetune / RandomInit model
class HSCmodel(nn.Module):
    def __init__(self):
        super(HSCmodel, self).__init__()
        self.fc1 = nn.Linear(52470, config['comb']['comb_num'][0])
        self.fc2 = nn.Linear(config['comb']['comb_num'][0], config['comb']['comb_num'][1])
        self.fc3 = nn.Linear(config['comb']['comb_num'][1], config['output_size'])
        self.activ,self.activ2 = set_activation_func()
        if config['norm']['type'] == 'batch' and config['norm']['first']==True:
            self.bn1 = nn.BatchNorm1d(config['comb']['comb_num'][0])
        if config['norm']['type'] == 'layer' and config['norm']['first']==True:
            self.ln1 = nn.LayerNorm(config['comb']['comb_num'][0])
        if config['norm']['type'] == 'batch' and config['norm']['second']==True:
            self.bn2 = nn.BatchNorm1d(config['comb']['comb_num'][1])
        if config['norm']['type'] == 'layer' and config['norm']['second']==True:
            self.ln2 = nn.LayerNorm(config['comb']['comb_num'][1])
        self.dropout = nn.Dropout(config['dropout_rate'])
        self.weights_init()

        
    def forward(self, x):
        x = self.fc1(x)
        if config['norm']['type'] == 'batch' and config['norm']['first']==True:
            x = self.bn1(x)
        if config['norm']['type'] == 'layer' and config['norm']['first']==True:
            x = self.ln1(x)
        if config['activation'] != None:
            x = self.activ(x)
        x = self.fc2(x)
        if config['norm']['type'] == 'batch' and config['norm']['second']==True:
            x = self.bn2(x)
        if config['norm']['type'] == 'layer' and config['norm']['second']==True:
            x = self.ln2(x)
        if config['activation2'] != None:
            x = self.activ2(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x
    
    def weights_init(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                # He initialization for ReLU
                nn.init.kaiming_uniform_(m.weight)
                # Bias initialization
                nn.init.normal_(m.bias, std=0.01)

In [27]:
def build_model_per_pretrained(p_model):
    
    #Init DNN model
    print('======================')
    print('Model building...')
    
    model = HSCmodel()
    
    if 'AE' in p_model[0]:
        hsp_1st_layer = str(p_model[1]).replace('.','') #1st layer sparsity
        pretrained_model = sio.loadmat(os.getcwd()+"/AE/AE_model/AE_hsp{}_dns0.mat".format(hsp_1st_layer))
        pretrained_w = pretrained_model['encoder.weight']
        pretrained_b = pretrained_model['encoder.bias']
        del pretrained_model
        model.fc1.state_dict()['weight'].data.copy_(torch.from_numpy(pretrained_w.T))
        model.fc1.state_dict()['bias'].data.copy_(torch.from_numpy(pretrained_b.reshape(config['comb']['comb_num'][0],)))

    if 'RBM' in p_model[0]:
        if p_model[1] == 0.5:
            pretrained_model = sio.loadmat(os.getcwd()+'/RBM/RBM_model/RBM_hsp05_dns0.mat')
            pretrained_w = pretrained_model['rbm_w']
            pretrained_b = pretrained_model['rbm_c']

        if p_model[1] == 0.8:
            pretrained_model = sio.loadmat(os.getcwd()+'/RBM/RBM_model/RBM_hsp08_dns0.mat')
            pretrained_w = pretrained_model['tmp_w']
            pretrained_b = pretrained_model['tmp_c']

        if p_model[1] == 0.9:
            pretrained_model = sio.loadmat(os.getcwd()+'/RBM/RBM_model/RBM_hsp09_dns0_w.mat')
            pretrained_w = pretrained_model['tmp_w']
            pretrained_b = sio.loadmat('/RBM/RBM_model/RBM_hsp09_dns0_bias')['c_gtd']        
        del pretrained_model

        model.fc1.state_dict()['weight'].data.copy_(torch.from_numpy(pretrained_w))
        model.fc1.state_dict()['bias'].data.copy_(torch.from_numpy(pretrained_b.reshape(5000,)))
        
    # pretrained weight,bias fixed / finetune
    model.fc1.weight.requires_grad = False if 'Fix' in p_model[0] else True
    model.fc1.bias.requires_grad = False if 'Fix' in p_model[0] else True
    
    print('Gradient update for 1st layer')
    print('Weight : ',model.fc1.weight.requires_grad)
    print('Bias : ',model.fc1.bias.requires_grad)
    print('======================')
    
    
    return model

In [28]:
### Save fold learning data & parameters
def save_fold(tgs_folder):

    np.savez_compressed(
        pth.join(tgs_folder, '{}fold_learning_curve_{}'.format(config['num_fold'],
                                                              fold+1)), 
        loss=outer_train_loss, 
        val_loss=outer_valid_loss,
        test_loss=outer_test_loss,
        acc=outer_train_acc, 
        val_acc=outer_valid_acc,  
        test_acc=outer_test_acc,
        lr = outer_lr_list,
        HSC_hsp1=layer_1_sp,
        HSC_beta1=layer_1_b,
        HSC_hsp2=layer_2_sp,
        HSC_beta2=layer_2_b,
        train_sbj = fold_train_sbj,
        val_sbj = fold_valid_sbj,
        model_epoch_idx = model_epoch,
        elapsed_time = elapsed_time
    )
    print('Learning curve saved at {}'.format(tgs_folder))
    return


In [29]:
# Import data save module
from TL4fMRI_save_data import save_data

sv = save_data(config)

save_file = sv.save_file
make_sp_folder = sv.make_sp_folder
save_parameters = sv.save_parameters
save_plot_during_training = sv.save_plot_during_training

# Import hoyer sparsity control module
from TL4fMRI_sparsity_control import HSP

hsp = HSP(config,wsc_flag,pretrain_type_list)

sparsity_control = hsp.sparsity_control
Hoyers_sparsity_control = hsp.Hoyers_sparsity_control
init_hsp = hsp.init_hsp

In [34]:
lr_curve_list = []
config['output_size'] = config['classes_for_task'][config['target_task']]  


#Target sparsity별
for tgs in target_h_list: #tgs[0]:1st layer, tgs[1]:2nd layer
    #pretrained 모델 별
    for p_model in pretrain_type_list: #p_model[0]:model name, p_model[1]:first layer hsp
        if tgs[0] != p_model[1]:
            if 'AE' in p_model[0] or 'RBM' in p_model[0]:
                continue
            elif 'Random' in p_model[0]:
                pass
            else:
                print('Invalid pretrain model')
                break

        print('Current pretrained model is [{}-{}]'.format(p_model[0],p_model[1]))
        print('Target sparsity is {}'.format(tgs))
        print('HSC type is {}'.format(config['hsc']['type']))
        
        #Make result folder
        output_folder = save_file(p_model,tgs)
        save_lr_path = make_sp_folder(p_model,tgs)
        save_parameters(p_model,save_lr_path)

        #Split in group K fold
        n_splits = config['num_fold']
        gkf = RepeatedGroupKFold(n_splits=n_splits,
                                 n_repeats=config['num_repeat_of_fold'],
                                 random_state=config['random_state'])


        for fold, (train_idx, valid_idx) in enumerate(gkf.split(train_x, train_y, groups=train_subjects)):
            print(f'-------------------Fold {fold+1}-------------------')
            fold_train_sbj = np.unique(train_subjects[train_idx])
            fold_valid_sbj = np.unique(train_subjects[valid_idx])
            fold_train_x = train_x[train_idx]
            fold_train_y = train_y[train_idx]
            fold_valid_x = train_x[valid_idx]
            fold_valid_y = train_y[valid_idx]

            #early stopping, learning rate custom annealing
            ES_switch = [False, 0]
            LR_switch = [False, False, 0]

            model_epoch = 0

            # Learning curve for 1 epochs
            outer_train_loss = []
            outer_valid_loss = []
            outer_test_loss = []
            outer_train_acc = []
            outer_valid_acc = []
            outer_test_acc = []
            outer_lr_list = []
            layer_1_sp = []
            layer_2_sp = []
            layer_1_b = []
            layer_2_b = []


            #Make train/valid/test dataset & dataloader accordingly
            fold_train_dataset = train_dataset(fold_train_x, fold_train_y)
            fold_valid_dataset = train_dataset(fold_valid_x, fold_valid_y)
            fold_test_dataset = test_dataset(test_x, test_y)

            train_loader = DataLoader(fold_train_dataset, batch_size=config['batch_size'], 
                                            shuffle=True, drop_last=True,num_workers=4)
            valid_loader = DataLoader(fold_valid_dataset, batch_size=config['batch_size'], 
                                            shuffle=True, drop_last=True,num_workers=4)
            test_loader = DataLoader(fold_test_dataset, batch_size=config['batch_size'], 
                                            shuffle=True, drop_last=True,num_workers=4)

            del fold_train_x
            del fold_train_y
            del fold_valid_x
            del fold_valid_y


    
            #Build DNN model
            model = build_model_per_pretrained(p_model)
            model.to(device)


            #Init optimizer & scheduler
            optimizer = optim.SGD(model.parameters(), lr=config['learning_rate'], 
                              weight_decay=config['hsc']['l2'], momentum=config['momentum'], nesterov=True)
            criterion =nn.BCEWithLogitsLoss() if config['output_size'] ==2 else nn.CrossEntropyLoss() #nn.BCEWithLogitsLoss() #nn.CrossEntropyLoss()
            scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = config['anneal_factor'], patience = config['lr_patience'])


            #Init HSP
            if 'Fix' in p_model[0]:
                hsp_val, beta_val, hsp_list, beta_list = 0,0,[],[]
            else:
                hsp_val, beta_val, hsp_list, beta_list = init_hsp()

            #EarlyStopping
            early_stopping = EarlyStopping_acc(patience = config['es_patience']
                                           , verbose = True
                                           , path = save_lr_path + '/{}fold.pt'.format(fold+1))

            fold_time = dt.now(tz)
            
            #Train begins here
            for epoch in range(0,config['num_epoch']):
                print(f'Starting epoch {epoch+1}')

                #Time calculate
                cur_time = dt.now(tz)

                #Train
                model.train()
                train_loss = 0
                total = 0
                train_acc = 0
                train_correct = 0
                count = 1


                if 'Finetune' in p_model[0]:
                    model.fc1.weight.requires_grad = True
                    model.fc1.bias.requires_grad = True

                current_lr = optimizer.param_groups[0]['lr']
                outer_lr_list.append(current_lr)
                print('Learning rate : ', current_lr)

                for input_, target in train_loader:
                    optimizer.zero_grad() #gradient reset
                    if config['output_size'] >2:
                        target = target.reshape(config['batch_size'],)
                    input_, target = input_.to(device), target.to(device)
                    output = model(input_) 
                    loss = criterion(output, target)

                    #Calculate L1 with hoyer sparsity control applied
                    l1_term,hsp_val,beta_val= sparsity_control(model, hsp_val, 
                                                 beta_val, hsp_list, beta_list, tgs)
                    cost = loss + l1_term #Calculate totla cost
                    train_loss += loss.item()
                    cost.backward() #cost backpropagate
                    optimizer.step() #update optimizer
                    total += output.size(0)
                    _, pred = torch.max(output.data, 1)
                    if config['output_size']==2:
                        _, target = torch.max(target,1)
                    train_correct += (pred == target).sum().item()
                    count+=1
                print("L1 regularization gradient updating status :", l1_term.requires_grad)
                if 'Fix' in p_model[0]:
                    if type(hsp_val) == float:
                        hsp_list.append(hsp_val)
                    if type(hsp_val) == torch.Tensor:
                        hsp_list.append(hsp_val.cpu().detach().numpy())
                    if type(beta_val) == float:
                        beta_list.append(beta_val)
                    if type(beta_val) == torch.Tensor:
                        beta_list.append(beta_val.cpu().detach().numpy())

                    print(f'Current layer2 sparsity is... {np.round(hsp_list[epoch],3)}')
                    print(f'Current layer2 beta is... {np.round(beta_list[epoch],6)}')
                else:
                    print(type(hsp_val[0]),type(hsp_val[1]),type(beta_val[0]),type(hsp_val[1]))
                    hsp_list[epoch][0] = hsp_val[0].cpu().detach().numpy()
                    beta_list[epoch][0] = beta_val[0]
                    hsp_list[epoch][1] = hsp_val[1].cpu().detach().numpy()

                    if type(beta_val[0]) == float:
                        beta_list[epoch][0] = beta_val[0] #.cpu().detach().numpy()
                    if type(beta_val[0]) == torch.Tensor:
                        beta_list[epoch][0] = beta_val[0].cpu().detach().numpy()
                    if type(beta_val[1]) == float:
                        beta_list[epoch][1] = beta_val[1] #.cpu().detach().numpy()
                    if type(beta_val[1]) == torch.Tensor:
                        beta_list[epoch][1] = beta_val[1].cpu().detach().numpy()
                
                #Print HSP,beta status
                print(f'Current layer1 sparsity is... {np.round(hsp_list[epoch][0],3)}')
                print(f'Current layer1 beta is... {np.round(beta_list[epoch][0],6)}')
                print(f'Current layer2 sparsity is... {np.round(hsp_list[epoch][1],3)}')
                print(f'Current layer2 beta is... {np.round(beta_list[epoch][1],6)}')
                train_loss /= count
                train_acc = 100 * train_correct / total

                #Save loss & acc for each epoch
                outer_train_acc.append(train_acc)
                outer_train_loss.append(train_loss)
                print(f'Train_acc : {round(train_acc,3)}')
                print(f'Train_loss : {round(train_loss,3)}')

                #Validation
                model.eval()
                valid_loss = 0
                valid_acc = 0
                valid_correct = 0
                total = 0
                count = 1
                with torch.no_grad():
                    for input_, target in valid_loader:
                        if config['output_size'] >2: 
                            target = target.reshape(config['batch_size'],)
                        input_, target = input_.to(device), target.to(device)
                        output = model(input_)
                        loss = criterion(output, target)
                        valid_loss += loss.item()
                        total += output.size(0)
                        _, pred = torch.max(output.data, 1)
                        if config['output_size']==2:
                            _, target = torch.max(target,1)
                        valid_correct += (pred == target).sum().item()
                        count +=1
                    valid_loss /= count
                    valid_acc = 100 * valid_correct / total
                    
                    #Save loss & acc for each epoch
                    outer_valid_loss.append(valid_loss)
                    outer_valid_acc.append(valid_acc)
                    print(f'Validation_acc : {round(valid_acc,3)}')
                    print(f'Validation_loss : {round(valid_loss,3)}')


                #Test
                model.eval()
                test_loss = 0
                test_acc = 0
                test_correct = 0
                total = 0
                count = 1
                with torch.no_grad():
                    for input_, target in test_loader:
                        if config['output_size'] >2:
                            target = target.reshape(config['batch_size'],)
                        input_, target = input_.to(device), target.to(device)
                        output = model(input_)
                        loss = criterion(output, target)
                        test_loss += loss.item()
                        total += output.size(0)
                        _, pred = torch.max(output.data, 1)
                        if config['output_size']==2:
                            _, target = torch.max(target,1)
                        test_correct += (pred == target).sum().item()
                        count +=1
                    test_loss /= count
                    test_acc = 100 * test_correct / total
                    
                    #Save loss & acc for each epoch
                    outer_test_loss.append(test_loss)
                    outer_test_acc.append(test_acc)
                    print(f'Test_acc : {round(test_acc,3)}')
                    print(f'Test_loss : {round(test_loss,3)}')          
                    
                    print("Time spent for this epoch : {}".format(dt.now(tz)-cur_time))

                #Start earlystopping after reaching target sparsity
                if np.mean(hsp_list[epoch][0]) >= tgs[0]-0.001 and np.mean(hsp_list[epoch][1])>=tgs[1]-0.001:
                    ES_switch[0] = True
                    ES_switch[1]+=1
                    if ES_switch[1]==1:
                        print('EarlyStopping count Start!')
                if ES_switch[0] == True and ES_switch[1] < config['es_patience']/10:
                    print("Wait for stabilize")
                if ES_switch[0] == True and ES_switch[1] >= config['es_patience']/10:
                    early_stopping(valid_acc, model)
                    scheduler.step(valid_loss)
                if early_stopping.early_stop:
                    print("Early stopping!!!")
                    model_epoch = epoch-config['es_patience']
                    break
                
                #Save figure for current learning status
                save_plot_during_training(p_model,tgs,save_lr_path,hsp_list,fold,epoch,
                                         outer_train_acc,outer_valid_acc,outer_test_acc,outer_lr_list,model_epoch)
                print('=======================')

            elapsed_time = dt.now(tz)-fold_time

            for i in range(0,epoch+1):
                layer_1_sp.append(hsp_list[i][0])
                layer_2_sp.append(hsp_list[i][1])
                layer_1_b.append(beta_list[i][0])
                layer_2_b.append(beta_list[i][1])

            clear_output()
            save_fold(save_lr_path)
        lr_curve_list.append(save_lr_path)

        #Clear memory after training
        del outer_train_loss
        del outer_valid_loss
        del outer_test_loss
        del outer_train_acc
        del outer_valid_acc
        del outer_test_acc
        del outer_lr_list
        del layer_1_sp
        del layer_2_sp
        del layer_1_b
        del layer_2_b

        gc.collect()

torch.cuda.empty_cache()
print(save_lr_path)

Current pretrained model is [FixRBM-0.8]
Target sparsity is [0.8, 0.5]
HSC type is layer
Save file path : /users/hjd/hsp_results/DNN_TL4fMRI/Transfer_learning/SOCIAL/FixRBM_0.8-0.5
Save file path : /users/hjd/hsp_results/DNN_TL4fMRI/Transfer_learning/SOCIAL/FixRBM_0.8-0.5
-------------------Fold 1-------------------
Model building...


RuntimeError: The size of tensor a (5000) must match the size of tensor b (52470) at non-singleton dimension 1