In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
import collections
import torchvision.transforms as transforms
from torch.optim import lr_scheduler
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.models as models
import pandas as pd
import torch.nn.functional as F
import random



In [2]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
random_seed = 512

In [4]:
torch.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)

In [5]:
data_root_lv = 'X:\\jechoi\\numpy\\Radiogenomics+genomics_raiomics_LAUD+LSCC'

In [6]:
training_latent_variable_raw = np.load(data_root_lv + '\\training_latent_variable_noBatchNorm.npy')
training_label_bf = np.load(data_root_lv + '\\training_latent_variable_stage_noBatchNorm.npy')
validation_latent_variable_raw = np.load(data_root_lv + '\\validation_latent_variable_noBatchNorm.npy')
validation_label_bf = np.load(data_root_lv + '\\validation_latent_variable_stage_noBatchNorm.npy')

In [7]:
training_latent_variable_ft = training_latent_variable_raw.reshape(training_latent_variable_raw.shape[0], -1)
validation_latent_variable_ft = validation_latent_variable_raw.reshape(validation_latent_variable_raw.shape[0], -1)

In [8]:
training_latent_variable_max = training_latent_variable_raw.max()
training_latent_variable_min = training_latent_variable_raw.min()

In [9]:
training_latent_variable = (training_latent_variable_raw - training_latent_variable_min) / (training_latent_variable_max - training_latent_variable_min)
validation_latent_variable = (validation_latent_variable_raw - training_latent_variable_min) / (training_latent_variable_max - training_latent_variable_min)

In [10]:
print(training_latent_variable_ft.shape)
print(training_label_bf.shape)
print(validation_latent_variable_ft.shape)
print(training_label_bf.shape)

(98, 32768)
(98,)
(37, 32768)
(98,)


In [11]:
label_one = len(np.where(training_label_bf == 1)[0]) / len(training_label_bf)
label_two = len(np.where(training_label_bf == 2)[0]) / len(training_label_bf)
label_three = len(np.where(training_label_bf == 3)[0]) / len(training_label_bf)
print('one : ', label_one)
print('two : ', label_two)
print('three : ', label_three)

one :  0.6326530612244898
two :  0.22448979591836735
three :  0.11224489795918367


In [12]:
val_label_one = len(np.where(validation_label_bf == 1)[0]) / len(validation_label_bf)
val_label_two = len(np.where(validation_label_bf == 2)[0]) / len(validation_label_bf)
val_label_three = len(np.where(validation_label_bf == 3)[0]) / len(validation_label_bf)
print('one : ', val_label_one)
print('two : ', val_label_two)
print('three : ', val_label_three)

one :  0.5405405405405406
two :  0.40540540540540543
three :  0.05405405405405406


In [13]:
training_label = np.array([])
for i in training_label_bf:
    if i == 1:
        new_s = 0
    else:
        new_s = 1
    training_label = np.append(training_label, new_s)

In [14]:
validation_label = np.array([])
for i in validation_label_bf:
    if i == 1:
        new_s = 0
    else:
        new_s = 1
    validation_label = np.append(validation_label, new_s)

In [15]:
label_one = len(np.where(training_label == 0)[0]) / len(training_label)
label_three = len(np.where(training_label == 1)[0]) / len(training_label)
print('early_stage : ', label_one)
print('late_stage : ', label_three)

early_stage :  0.6326530612244898
late_stage :  0.3673469387755102


In [16]:
val_label_one = len(np.where(validation_label == 0)[0]) / len(validation_label)
val_label_three = len(np.where(validation_label == 1)[0]) / len(validation_label)
print('early_stage : ', val_label_one)
print('late_stage : ', val_label_three)

early_stage :  0.5405405405405406
late_stage :  0.4594594594594595


In [17]:
train_label = np.zeros((len(training_label), 2))
for i in range(len(training_label)):
    label = int(training_label[i] - 1)
    train_label[i, label] = 1

In [18]:
val_label = np.zeros((len(validation_label), 2))
for i in range(len(validation_label)):
    label = int(validation_label[i] - 1)
    val_label[i, label] = 1

In [19]:
train_x_torch = torch.tensor(training_latent_variable_ft, dtype=torch.float)
train_y_torch = torch.tensor(train_label, dtype=torch.long)

In [20]:
val_x_torch = torch.tensor(validation_latent_variable_ft, dtype=torch.float)
val_y_torch = torch.tensor(val_label, dtype=torch.long)

In [21]:
batch_size = 16
momentum = 0.9
learning_rate = 5e-6
num_epochs = 1000
num_classes = 2

In [22]:
class Dataset(Dataset):
    def __init__(self, images, label):
        self.labels = label
        self.images = images
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        X = self.images[index]
        y = self.labels[index]
        return X, y

In [23]:
training_set = Dataset(train_x_torch, train_y_torch)
train_loader = DataLoader(training_set, batch_size = batch_size, shuffle=True)
batch_len_train = len(train_loader)

In [24]:
validation_set = Dataset(val_x_torch, val_y_torch)
validation_loader = DataLoader(validation_set, batch_size = batch_size, shuffle=True)
batch_len_val = len(validation_loader)

In [25]:
'''
saved result model
'''


# class Model(nn.Module):
#     def __init__(self, in_channels, out_channels1, out_channels2, flatten_size, out_features1, 
#                  out_features2, out_features3, out_features4):
#         super(Model, self).__init__()
#         self.cnn1 = nn.Conv2d(in_channels, out_channels1, kernel_size= 3, padding = 1)
#         self.cnn2 = nn.Conv2d(out_channels1, out_channels2, kernel_size= 3, padding = 1)
#         self.linear1 = nn.Linear(flatten_size, out_features1)
#         self.linear2 = nn.Linear(out_features1, out_features2)
#         self.linear3 = nn.Linear(out_features2, out_features3)
#         self.linear4 = nn.Linear(out_features3, out_features4)
#         self.BatchNorm1 = nn.BatchNorm2d(out_channels1)
#         self.BatchNorm2 = nn.BatchNorm2d(out_channels2)
#         self.dropout = nn.Dropout2d(0.5)
#         self.dropout1 = nn.Dropout(0.5)
        
#     def forward(self, x):
#         output = self.cnn1(x)
#         output = self.BatchNorm1(output)
#         output = self.dropout(output)
#         output = self.cnn2(output)
#         output = output.view(output.size(0), -1)
#         output = self.dropout1(output)
#         output = self.linear1(output)
#         output = self.dropout1(output)
#         output = self.linear2(output)
#         output = self.dropout1(output)
#         output = self.linear3(output)
#         output = self.linear4(output)
#         output = F.log_softmax(output, dim = 1)
#         return output

'\nsaved result model\n'

In [26]:
class Model(nn.Module):
    def __init__(self, flatten_size, out_features1, out_features2, out_features3, out_features4, out_features5, 
                out_features6, out_features7):
        super(Model, self).__init__()
        self.linear1 = nn.Linear(flatten_size, out_features1)
        self.linear2 = nn.Linear(out_features1, out_features2)
        self.linear3 = nn.Linear(out_features2, out_features3)
        self.linear4 = nn.Linear(out_features3, out_features4)
        self.linear5 = nn.Linear(out_features4, out_features5)
        self.linear6 = nn.Linear(out_features5, out_features6)
        self.linear7 = nn.Linear(out_features6, out_features7)
        self.BatchNorm1 = nn.BatchNorm1d(out_features1)
        self.BatchNorm2 = nn.BatchNorm1d(out_features3)
        self.dropout1 = nn.Dropout(0.5)
        
    def forward(self, x):
        output = self.linear1(x)
        output = self.BatchNorm1(output)
        output = self.dropout1(output)
        output = self.linear2(output)
        output = self.dropout1(output)
        output = self.linear3(output)
        output = self.BatchNorm2(output)
        output = self.dropout1(output)
        output = self.linear4(output)
        output = self.dropout1(output)
        output = self.linear5(output)
#         output = self.dropout1(output)
        output = self.linear6(output)
        output = self.linear7(output)
        output = F.log_softmax(output, dim = 1)
        return output

In [27]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

In [28]:
flatten_size = training_latent_variable_ft.shape[1]

In [29]:
my_model = Model(flatten_size, 128, 64, 32, 16, 16, 8, num_classes)
my_model.cuda()

Model(
  (linear1): Linear(in_features=32768, out_features=128, bias=True)
  (linear2): Linear(in_features=128, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=32, bias=True)
  (linear4): Linear(in_features=32, out_features=16, bias=True)
  (linear5): Linear(in_features=16, out_features=16, bias=True)
  (linear6): Linear(in_features=16, out_features=8, bias=True)
  (linear7): Linear(in_features=8, out_features=2, bias=True)
  (BatchNorm1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (BatchNorm2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.5, inplace=False)
)

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate, weight_decay=1e-5)

In [31]:
result = np.array([]).reshape(0, 6)
for epoch in range(num_epochs):
    epoch_loss_train = 0.0
    epoch_train_acc = 0.0
    predicted_train_output = np.array([])
    train_real = np.array([])


    my_model.train()
    for train_x_batch, train_y_batch in train_loader:
        train_x = Variable(train_x_batch).cuda()
        train_y = Variable(train_y_batch).cuda()
        
        optimizer.zero_grad()
        
        train_output = my_model(train_x)
        train_epoch_loss = criterion(train_output, torch.max(train_y, 1)[1])

        train_epoch_loss.backward()
        optimizer.step()

        epoch_loss_train += (train_epoch_loss.data.item() * len(train_x_batch))

        pred = np.argmax(train_output.data.cpu().numpy(), axis = 1)
        true = np.argmax(train_y.data.cpu().numpy(), axis = 1)
        predicted_train_output = np.append(predicted_train_output, pred)
        train_real = np.append(train_real, true)
        correct = len(np.where(pred == true)[0])
        epoch_train_acc += (correct / len(pred))

    train_loss = epoch_loss_train / len(train_x_torch)
    train_acc = len(np.where(predicted_train_output == train_real)[0]) / len(predicted_train_output)


    with torch.no_grad():
        epoch_loss_val = 0.0
        epoch_acc_val = 0.0
        predicted_val_output = np.array([])
        val_real = np.array([])
        
        my_model.eval()

        for validation_x_batch, validation_y_batch in validation_loader:
            validation_x = Variable(validation_x_batch).cuda()
            validation_y = Variable(validation_y_batch).cuda()

            validation_output = my_model(validation_x)
            validation_epoch_loss = criterion(validation_output, torch.max(validation_y, 1)[1])

            epoch_loss_val += (validation_epoch_loss.data.item() * len(validation_x_batch))

            pred_val = np.argmax(validation_output.data.cpu().numpy(), axis = 1)
            true_val = np.argmax(validation_y.data.cpu().numpy(), axis = 1)
            correct_val = len(np.where(pred_val == true_val)[0])
            epoch_acc_val += (correct_val / len(pred_val))
            
            predicted_val_output = np.append(predicted_val_output, pred_val)
            val_real = np.append(val_real, true_val)
            

        val_loss = epoch_loss_val / len(val_x_torch)
        val_acc = len(np.where(predicted_val_output == val_real)[0]) / len(predicted_val_output)
        
        result_list = np.array([str(epoch+1), str(num_epochs), train_loss, train_acc, val_loss, val_acc]).reshape(1, 6)
        
        result = np.append(result, result_list, axis = 0)
        if (epoch + 1) == 1 :
            print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
                  format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))


        if (epoch + 1) % 10 == 0 :
            print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
                  format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))
            
        if  val_acc > 0.78 and train_acc > 0.90:
            print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
                  format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))       
            
#         if train_acc == 1.0:
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))
#             break
                                                                                                                      

Epoch [1/1000], Train loss : 0.6885586, Train acc : 0.5612, Val loss : 0.6906, Val acc : 0.5405405
Epoch [10/1000], Train loss : 0.6616435, Train acc : 0.6122, Val loss : 0.6790, Val acc : 0.7297297
Epoch [20/1000], Train loss : 0.6413128, Train acc : 0.7449, Val loss : 0.6772, Val acc : 0.6486486
Epoch [30/1000], Train loss : 0.6334696, Train acc : 0.6939, Val loss : 0.6768, Val acc : 0.7837838
Epoch [40/1000], Train loss : 0.6222296, Train acc : 0.8571, Val loss : 0.6761, Val acc : 0.7297297
Epoch [50/1000], Train loss : 0.6079484, Train acc : 0.8367, Val loss : 0.6758, Val acc : 0.6756757
Epoch [60/1000], Train loss : 0.5999724, Train acc : 0.8469, Val loss : 0.6751, Val acc : 0.6756757
Epoch [70/1000], Train loss : 0.6182377, Train acc : 0.7755, Val loss : 0.6750, Val acc : 0.6486486
Epoch [80/1000], Train loss : 0.6130329, Train acc : 0.8265, Val loss : 0.6758, Val acc : 0.6756757
Epoch [90/1000], Train loss : 0.6219976, Train acc : 0.8163, Val loss : 0.6718, Val acc : 0.6486486
E

KeyboardInterrupt: 

In [None]:
result.shape

In [None]:
result_pd = pd.DataFrame(result, columns = ['num_epoch', 'Epoch', 'train loss', 'train acc', 
                                           'validation loss', 'validation acc'])

In [None]:
result_pd

In [None]:
result_pd.to_csv(data_root_lv + '\\deep_learning_result_ft.csv')

## validation loss 통으로 보기

In [None]:
# for epoch in range(num_epochs):
#     epoch_loss_train = 0.0
#     epoch_train_acc = 0.0
#     predicted_train_output = np.array([])
#     train_real = np.array([])
    
#     raw_train_output = torch.empty(0, 2).cuda()
#     raw_train_real = torch.empty(0, 2).type(torch.LongTensor).cuda()


#     my_model.train()
#     for train_x_batch, train_y_batch in train_loader:
#         train_x = Variable(train_x_batch).cuda()
#         train_y = Variable(train_y_batch).cuda()
        
#         optimizer.zero_grad()
        
#         train_output = my_model(train_x)
#         train_epoch_loss = criterion(train_output, torch.max(train_y, 1)[1])

#         train_epoch_loss.backward()
#         optimizer.step()

#         epoch_loss_train += (train_epoch_loss.data.item() * len(train_x_batch))

#         pred = np.argmax(train_output.data.cpu().numpy(), axis = 1)
#         true = np.argmax(train_y.data.cpu().numpy(), axis = 1)
#         predicted_train_output = np.append(predicted_train_output, pred)
#         train_real = np.append(train_real, true)
#         correct = len(np.where(pred == true)[0])
#         epoch_train_acc += (correct / len(pred))
        
#         raw_train_output = torch.cat((raw_train_output, train_output), dim = 0)
#         raw_train_real = torch.cat((raw_train_real, train_y), dim = 0)
        
#     train_loss = criterion(raw_train_output, torch.max(raw_train_real, 1)[1])
#     train_acc = len(np.where(predicted_train_output == train_real)[0]) / len(predicted_train_output)


#     with torch.no_grad():
#         epoch_loss_val = 0.0
#         epoch_acc_val = 0.0
#         predicted_val_output = np.array([])
#         val_real = np.array([])
        
#         raw_val_output = torch.empty(0, 2).cuda()
#         raw_val_real = torch.empty(0, 2).type(torch.LongTensor).cuda()
        
#         my_model.eval()

#         for validation_x_batch, validation_y_batch in validation_loader:
#             validation_x = Variable(validation_x_batch).cuda()
#             validation_y = Variable(validation_y_batch).cuda()

#             validation_output = my_model(validation_x)
#             validation_epoch_loss = criterion(validation_output, torch.max(validation_y, 1)[1])

#             epoch_loss_val += (validation_epoch_loss.data.item() * len(validation_x_batch))

#             pred_val = np.argmax(validation_output.data.cpu().numpy(), axis = 1)
#             true_val = np.argmax(validation_y.data.cpu().numpy(), axis = 1)
#             correct_val = len(np.where(pred_val == true_val)[0])
#             epoch_acc_val += (correct_val / len(pred_val))
            
#             predicted_val_output = np.append(predicted_val_output, pred_val)
#             val_real = np.append(val_real, true_val)
            
#             raw_val_output = torch.cat((raw_val_output, validation_output), dim = 0)
#             raw_val_real = torch.cat((raw_val_real, validation_y), dim = 0)

#         val_loss = criterion(raw_val_output, torch.max(raw_val_real, 1)[1])
#         val_acc = len(np.where(predicted_val_output == val_real)[0]) / len(predicted_val_output)


#         if (epoch + 1) == 1 :
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))


#         if (epoch + 1) % 10 == 0 :
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))
            
#         if  val_acc > 0.70 :
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))       
        
                                                                                                                                              

In [None]:
# early_stopping = EarlyStopping(patience = 5, verbose = True)

In [None]:
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate, weight_decay=1e-4)

In [None]:
# for epoch in range(num_epochs):
#     epoch_loss_train = 0.0
#     epoch_train_acc = 0.0
#     predicted_train_output = np.array([])
#     train_real = np.array([])
#     epoch_loss_train_re = 0.0
    
#     my_model.train()
#     for train_x_batch, train_y_batch in train_loader:
#         train_x = Variable(train_x_batch).cuda()
#         train_y = Variable(train_y_batch).cuda()
        
#         optimizer.zero_grad()
        
#         train_output = my_model(train_x)
#         train_epoch_loss = criterion(train_output, torch.max(train_y, dim = 1)[1])

#         train_epoch_loss.backward()
#         optimizer.step()
        
#         epoch_loss_train_re += (train_epoch_loss.data.item())*len(train_x_batch) 
        
#         pred = np.argmax(train_output.data.cpu().numpy(), axis = 1)
#         true = np.argmax(train_y.data.cpu().numpy(), axis = 1)
#         predicted_train_output = np.append(predicted_train_output, pred)
#         train_real = np.append(train_real, true)
#         correct = len(np.where(pred == true)[0])
#         epoch_train_acc += (correct / len(pred))


#     train_loss = epoch_loss_train_re / len(train_x_torch)
#     train_acc = len(np.where(predicted_train_output == train_real)[0]) / len(predicted_train_output)

#     with torch.no_grad():
#         epoch_loss_val = 0.0
#         epoch_acc_val = 0.0
#         predicted_val_output = np.array([])
#         val_real = np.array([])
#         epoch_loss_val_re = 0.0
        
#         my_model.eval()

#         for validation_x_batch, validation_y_batch in validation_loader:
#             validation_x = Variable(validation_x_batch).cuda()
#             validation_y = Variable(validation_y_batch).cuda()

#             validation_output = my_model(validation_x)
#             validation_epoch_loss = criterion(validation_output, torch.max(validation_y, dim = 1)[1])

#             epoch_loss_val_re += validation_epoch_loss.data.item() * len(validation_x_batch) 
            
#             pred_val = np.argmax(validation_output.data.cpu().numpy(), axis = 1)
#             true_val = np.argmax(validation_y.data.cpu().numpy(), axis = 1)
#             correct_val = len(np.where(pred_val == true_val)[0])
#             epoch_acc_val += (correct_val / len(pred_val))
            
#             predicted_val_output = np.append(predicted_val_output, pred_val)
#             val_real = np.append(val_real, true_val)
            

#         val_loss = epoch_loss_val_re / len(val_x_torch)
#         val_acc = len(np.where(predicted_val_output == val_real)[0]) / len(predicted_val_output)

#         early_stopping(val_loss, my_model)
        
#         if (epoch + 1) == 1 :
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))


#         if (epoch + 1) % 10 == 0 :
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))
        
#         if val_acc > 0.86 :
#             print('Epoch [{}/{}], Train loss : {:.7f}, Train acc : {:.4f}, Val loss : {:.4f}, Val acc : {:.7f}'.
#                   format(epoch+1, num_epochs, train_loss, train_acc, val_loss, val_acc))             
#             break