In [1]:
import torch, shap, glob, os
import numpy as np

In [2]:
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split
# train val data path, checkpoint config
model_type = 'mlp'
version_type = 'ver1'
convet_index_dict = {'ver1': 'Integer', 'ver2': 'EIIP', 'ver3': 'Atomic', 'ver4': 'Voss'}
if model_type == 'cnn':
    npy_path = f'./np_image_totalunit/tsne-binary-perplexity=5-pixel=100[{convet_index_dict[version_type]}]/'
    save_weight_path = f'./model/weights_res18_tsne-binary-perplexity=50-pixel=100[{convet_index_dict[version_type]}]-[10 times]/'
    npy_data_list = [os.path.join(npy_path,'image_npy',i ) for i in sorted(os.listdir(os.path.join(npy_path,'image_npy')))]
else:
    npy_path = f'./np_image_totalunit/mlp_{convet_index_dict[version_type]}/'
    save_weight_path =f'./model/weights_mlp-binary-[{convet_index_dict[version_type]}]-[10 times]/'
    npy_data_list = np.load(f'./np_image_totalunit/mlp_{convet_index_dict[version_type]}/gene.npy')
if not os.path.exists(save_weight_path):
    os.mkdir(save_weight_path)

label_ = np.load(os.path.join(npy_path,'label.npy'))

In [3]:
X, _, y, _ = train_test_split(npy_data_list, label_,stratify = label_, test_size=0.25, random_state=42)

In [4]:
from train_val import train_val_function
from function import torch_dataset_func
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(shuffle=True, n_splits=10, random_state=123) #random seed = 123

In [5]:
for times_, (train_index, valid_index) in enumerate(skf.split(X, y)):
    # split train, val data
    # print(valid_index)
    X_train, X_val, y_train, y_val = X[train_index], X[valid_index], y[train_index], y[valid_index]
    print('train: ', len(X_train), '|| val: ', len(X_val)) 
    train_ld = torch_dataset_func(model_type = model_type)
    train_ds = train_ld.get_TransferDataset(data_list= X_train, labels= y_train, batch_size = 32, shuffle = True)
    val_ld = torch_dataset_func(model_type = model_type)
    val_ds = val_ld.get_TransferDataset(data_list= X_val, labels= y_val, batch_size = 32, shuffle = False)
    print(len(train_ds), len(val_ds))
    weights_name = f"weights_binaryclass_Covid19[-NACGT].final-[10 times]-{convet_index_dict[version_type]}-{times_+1}.pt"
    path2weights = os.path.join(save_weight_path,weights_name)
    params_train={
        "num_epochs": 100,
        "optimizer": 'Adam',
        "loss_func": 'BCE',
        "train_dl": train_ds,
        "val_dl": val_ds,
        "sanity_check": False,
        "lr_scheduler":  'ReduceLROnPlateau',
        "path2weights": path2weights,
        }

    start_train = train_val_function()
    if model_type !='cnn':
        start_train.model_config(model_type = model_type, data_shape = X_train.shape, class_num = 1, use_cuda = True)
    else:
        start_train.model_config(model_type = model_type, data_shape = None, class_num = 1, use_cuda = True)
    loss_history, metric_history, auc_history = start_train.train_val_main(params_train)
    import matplotlib.pyplot as plt
    plt.plot(loss_history['train'])
    plt.plot(loss_history['val'])
    plt.legend(['train','val'])
    plt.title(f'[{model_type} {convet_index_dict[version_type]}] loss history')
    plt.savefig(os.path.join(save_weight_path, f'{model_type }_{weights_name[0:-3]} - loss_history-{times_+1}.jpg'))
    plt.figure().clear()
    plt.close()
    plt.cla()
    plt.clf()
    plt.plot(auc_history['train'])
    plt.plot(auc_history['val'])
    plt.legend(['train','val'])
    plt.title(f'[{model_type} {convet_index_dict[version_type]}] auc history')
    plt.savefig(os.path.join(save_weight_path, f'{model_type }_{weights_name[0:-3]} - auc_history-{times_+1}.jpg'))
    plt.figure().clear()
    plt.close()
    plt.cla()
    plt.clf()


train:  765 || val:  86
24 3
Epoch 0/99, current lr=0.003
Copied best model weights!
train loss: 0.696884, dev loss: 0.688143,  train accuracy: 39.33,valid accuracy: 38.73
train auc: 73.00,valid auc: 64.78
----------
Epoch 1/99, current lr=0.003
Copied best model weights!
train loss: 0.667954, dev loss: 0.682230,  train accuracy: 39.46,valid accuracy: 39.77
train auc: 81.45,valid auc: 58.61
----------
Epoch 2/99, current lr=0.003
train loss: 0.658160, dev loss: 0.690708,  train accuracy: 39.35,valid accuracy: 38.73
train auc: 81.09,valid auc: 74.74
----------
Epoch 3/99, current lr=0.003
train loss: 0.637790, dev loss: 0.785721,  train accuracy: 40.01,valid accuracy: 38.73
train auc: 85.45,valid auc: 63.09
----------
Epoch 4/99, current lr=0.003
train loss: 0.627603, dev loss: 0.905788,  train accuracy: 39.44,valid accuracy: 38.73
train auc: 89.27,valid auc: 79.57
----------
Epoch 5/99, current lr=0.003
Copied best model weights!
train loss: 0.618099, dev loss: 0.663516,  train accurac

<Figure size 432x288 with 0 Axes>