In [1]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
import pickle
from torchmetrics import StructuralSimilarityIndexMeasure
from statistics import median, mean
from matplotlib import pyplot as plt
import numpy as np
from pytorch_msssim import ssim, ms_ssim, SSIM, MS_SSIM
from torchsummary import summary
import json
from tqdm import tqdm
from glob import glob
import sys
path = "/study3/mrphys/skunkworks/kk/mriUnet"
sys.path.insert(0,path)
import unet
from torchvision import transforms
from torch.utils.data import Dataset

allImages = sorted(glob("/study/mrphys/skunkworks/training_data//mover01/*/", recursive=True))

class h5DatasetIndividual(Dataset):
    def __init__(self, sample):
        self.orginalPathList = []
        self.accelPathList = []
        self.orginalFileList = []
        self.accelFileList = []
        # self.mid = int(256/2) - 3  ## minus three because we are taking the middle 8 slices

        folderName = allImages[sample]
        self.orginalPathList.append(folderName + 'processed_data/C.h5')
        self.accelPathList.append(folderName +'processed_data/acc_2min/C.h5')
        
        for orginalPath, accelPath in zip(self.orginalPathList, self.accelPathList):
            prefix = 'C_000_0'
            orginalImageNumpy_Stack = None
            accelImageNumpy_Stack = None
            with h5py.File(orginalPath,'r') as hf:
                for i in range(16):
                    n = prefix + str(i).zfill(2)
                    image = hf['Images'][n]
                
                    imageNumpy = image['real']
                    imageNumpy = imageNumpy-imageNumpy.min()
                    imageNumpy = imageNumpy * (1/(imageNumpy.max()))
                    orginalImageNumpy = np.array(imageNumpy + 0j*image['imag'])
                    if i == 0:
                        orginalImageNumpy_Stack = np.expand_dims(np.copy(orginalImageNumpy), axis=0)
                    else:
                        orginalImageNumpy_Stack = np.concatenate((orginalImageNumpy_Stack, np.expand_dims(orginalImageNumpy, axis=0)), axis=0)

            
            with h5py.File(accelPath,'r') as hf:
                for i in range(16):
                    n = prefix + str(i).zfill(2)
                    image = hf['Images'][n]
                
                    imageNumpy = image['real']
                    imageNumpy = imageNumpy-imageNumpy.min()
                    imageNumpy = imageNumpy * (1/(imageNumpy.max()))
                    accelImageNumpy = np.array(imageNumpy + 0j*image['imag'])
                    if i == 0:
                        accelImageNumpy_Stack = np.expand_dims(np.copy(accelImageNumpy), axis=0)
                    else:
                        accelImageNumpy_Stack = np.concatenate((accelImageNumpy_Stack, np.expand_dims(accelImageNumpy, axis=0)), axis=0)

            for i in range(256): ## train each slice for each subject
                for j in range(16):
                    if j == 0:
                        orginalStack = np.expand_dims(np.copy(orginalImageNumpy_Stack[j][i][32:224]), axis=0)
                        accelStack = np.expand_dims(np.copy(accelImageNumpy_Stack[j][i][32:224]), axis=0)
                    else:
                        orginalStack = np.concatenate((orginalStack, np.expand_dims(orginalImageNumpy_Stack[j][i][32:224], axis=0)), axis=0)
                        accelStack = np.concatenate((accelStack, np.expand_dims(accelImageNumpy_Stack[j][i][32:224], axis=0)), axis=0)
                self.orginalFileList.append(orginalStack)
                self.accelFileList.append(accelStack)
            
            print('Image ' + orginalPath + ' loaded')

    def __getitem__(self, index):
        return self.accelFileList[index], self.orginalFileList[index]

    def __len__(self):
        return len(self.accelFileList)

trainData  = []
testData = []

pbar = tqdm(range(len(allImages)), desc="loading datasets")

for i in pbar:
    with open(f'/scratch/mrphys/pickled/dataset_{i}.pickle', 'rb') as f:
        data = pickle.load(f)
        if i>= 55: # test
            testData.append(data)
        else: # train
            trainData.append(data)
        del data

transformIdentity = lambda x : x

class Trainer:
    
    def __init__(self, 
                 model, 
                 learningRate,
                 train_data, 
                 test_data,
                 norm_scale = 1,
                 model_name = 'mriUnet_features',
                 device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
                 transforms = transformIdentity
                ):
        
        self.lossCounter = {
            'train':[],
            'test':[],
        } #can unpack into pandas dataFrame later
        
        self.model = model.to(device)
        self.name = model_name
        self.trainLoader = train_data
        self.testLoader = test_data
        self.norm_scale = norm_scale
        self.device = device
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learningRate)
        self.scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.5)
        
        self.transforms = transforms
        self.device = device

    def trainOneEpoch(self, curr_ep):
        
        self.model.to(self.device)
        
        self.model.train()
        
        pbar = tqdm(enumerate(self.trainLoader))
        total_batch = len(self.trainLoader)
        pbar.set_description(f"Training Epoch : {curr_ep}")
        
        meanLoss = 0
        counter = 0
        
        for batch, (X, y) in pbar:
            
            batch_size = X.size()[0]
            Batch = torch.cat([X, y], axis=0)
            Batch = self.transforms(Batch)
            X = Batch[:batch_size]
            y = Batch[batch_size:]
            
            X, y = X*self.norm_scale, y*self.norm_scale
            X, y = X.to(self.device), y.to(self.device)
            pred = torch.sigmoid(self.model(X))
            ssim_loss = (1-ms_ssim(pred.real, y.real, data_range=self.norm_scale, size_average=False)).mean()
            
            ## update loss counter
            counter += X.shape[0]
            meanLoss += ssim_loss.item()*X.shape[0]
            
            ## Backpropagation
            self.optimizer.zero_grad()
            ssim_loss.backward() 
            self.optimizer.step()
            
            ## Memory clear
            del X, y, pred
            
            pbar.set_description(f"Training Epoch : {curr_ep} [batch {batch+1}/{total_batch}] - loss = {round(meanLoss/counter,6)}")
            
        self.lossCounter['train'].append(meanLoss/counter)
        
        return meanLoss/counter  

    def testOneEpoch(self, curr_ep):
        
        self.model.to(self.device)
        
        self.model.eval()
        
        pbar = tqdm(enumerate(self.testLoader))
        total_batch = len(self.testLoader)
        pbar.set_description(f"Testing Epoch : {curr_ep}")
        
        meanLoss = 0
        counter = 0
        
        for batch, (X, y) in pbar:
            
            with torch.no_grad():
                
                batch_size = X.size()[0]
                Batch = torch.cat([X, y], axis=0)
                Batch = self.transforms(Batch)
                X = Batch[:batch_size]
                y = Batch[batch_size:]
            
                X, y = X*self.norm_scale, y*self.norm_scale
                X, y = X.to(self.device), y.to(self.device)
                pred = torch.sigmoid(self.model(X))
                ssim_loss = (1-ms_ssim(pred.real, y.real, data_range=self.norm_scale, size_average=False)).mean()
            
                #update loss counter
                counter += X.shape[0]
                meanLoss += ssim_loss.item()*X.shape[0]

                pbar.set_description(f"Testing Epoch : {curr_ep} [batch {batch+1}/{total_batch}] - loss = {round(meanLoss/counter,6)}")
                
                ## Memory clear
                del X, y, pred
            
        self.lossCounter['test'].append(meanLoss/counter)
        
        return meanLoss/counter
    
    def saveLossPLot(self):
        tr_loss = self.lossCounter['train']
        te_loss = self.lossCounter['test']
        plt.plot(tr_loss, label='train loss')
        plt.plot(te_loss, label='test loss')
        plt.legend()
        plt.savefig(f'/study/mrphys/skunkworks/kk/lossPlot/{self.name}_LossPlot.png')
        plt.close()
    
    def trainLoop(self, 
                  epochs,
                  es_patience = 20,
                  lr_patience = 10,
                  fromCheckpoint = False,
                 ):
        patienceCounter = 0,
        bestLoss = 1e9 #REALLY LARGE

        if fromCheckpoint:

            # load model's weight
            self.model.load_state_dict(torch.load(f'/study/mrphys/skunkworks/kk/weights/{self.name}_BEST.pth', map_location=self.device))

            # load training log
            with open(f'/study/mrphys/skunkworks/kk/logs/{self.name}_logs.json', 'r') as f:
                self.lossCounter = json.load(f)
                bestLoss = np.min(self.lossCounter['test'])
                patienceCounter = len(self.lossCounter['test'])-1-np.argmin(self.lossCounter['test'])

        
        for curr_ep in range(epochs):
            meanTrainLoss = self.trainOneEpoch(curr_ep)
            meanTestLoss = self.testOneEpoch(curr_ep)
            self.saveLossPLot()

            #EARLYSTOPPING
            if bestLoss > meanTestLoss:
                bestLoss = meanTestLoss
                patienceCounter = 0
                torch.save(self.model.state_dict(), f'/study/mrphys/skunkworks/kk/weights/{self.name}_BEST.pth')
                with open(f'/study/mrphys/skunkworks/kk/logs/{self.name}_logs.json', 'w') as f:
                    json.dump(self.lossCounter, f)
            else:
                patienceCounter += 1
                
            print(f'Early Stopping Counter = {patienceCounter}/20')

            if patienceCounter>=lr_patience:
                print('Loss stops improving for 10 epochs -> LR step by 0.5')
                self.scheduler.step()
                
            if patienceCounter>=es_patience:
                print('Loss stops improving for 20 epochs -> EARLY STOPPING')
                break

loading datasets: 100%|█████████████████████████████████████████████████████████████████| 65/65 [03:23<00:00,  3.13s/it]


In [None]:
transformSet = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
        ])

BATCHSIZE = 16

trainDataset = torch.utils.data.ConcatDataset(trainData)
testDataset = torch.utils.data.ConcatDataset(testData)

trainDataloader = DataLoader(dataset=trainDataset, batch_size=BATCHSIZE, shuffle=True)
testDataloader = DataLoader(dataset=testDataset, batch_size=BATCHSIZE, shuffle=False)

model = unet.UNet(16,
                16,
                f_maps=16,
                layer_order=['separable convolution', 'relu'],
                depth=3,
                layer_growth=2.0,
                residual=True,
                complex_input=True,
                complex_kernel=True,
                ndims=2,
                padding=1)


trainer = Trainer(model, 
    1e-3,
    trainDataloader, 
    testDataloader,
    norm_scale = 1,
    transforms=transformIdentity,
    model_name = 'mriUnet_features_dsep',
)
trainer.trainLoop(1000, fromCheckpoint = False)

Crop amount [(-4, -4, -4, -4), (-16, -16, -16, -16)]


Training Epoch : 0 [batch 880/880] - loss = 0.193466: : 880it [08:28,  1.73it/s]
Testing Epoch : 0 [batch 160/160] - loss = 0.178673: : 160it [01:02,  2.55it/s]


Early Stopping Counter = 0/20


Training Epoch : 1 [batch 880/880] - loss = 0.183324: : 880it [07:48,  1.88it/s]
Testing Epoch : 1 [batch 160/160] - loss = 0.176511: : 160it [00:57,  2.81it/s]


Early Stopping Counter = 0/20


Training Epoch : 2 [batch 880/880] - loss = 0.181952: : 880it [07:49,  1.88it/s]
Testing Epoch : 2 [batch 160/160] - loss = 0.175588: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 0/20


Training Epoch : 3 [batch 880/880] - loss = 0.181262: : 880it [07:48,  1.88it/s]
Testing Epoch : 3 [batch 160/160] - loss = 0.175384: : 160it [00:56,  2.82it/s]


Early Stopping Counter = 0/20


Training Epoch : 4 [batch 880/880] - loss = 0.180721: : 880it [07:53,  1.86it/s]
Testing Epoch : 4 [batch 160/160] - loss = 0.174762: : 160it [00:55,  2.88it/s]


Early Stopping Counter = 0/20


Training Epoch : 5 [batch 880/880] - loss = 0.180222: : 880it [07:59,  1.83it/s]
Testing Epoch : 5 [batch 160/160] - loss = 0.17434: : 160it [00:59,  2.69it/s] 


Early Stopping Counter = 0/20


Training Epoch : 6 [batch 880/880] - loss = 0.179838: : 880it [07:58,  1.84it/s]
Testing Epoch : 6 [batch 160/160] - loss = 0.174184: : 160it [00:59,  2.71it/s]


Early Stopping Counter = 0/20


Training Epoch : 7 [batch 880/880] - loss = 0.179534: : 880it [07:54,  1.85it/s]
Testing Epoch : 7 [batch 160/160] - loss = 0.173901: : 160it [00:59,  2.69it/s]


Early Stopping Counter = 0/20


Training Epoch : 8 [batch 880/880] - loss = 0.179243: : 880it [07:56,  1.85it/s]
Testing Epoch : 8 [batch 160/160] - loss = 0.173767: : 160it [00:58,  2.72it/s]


Early Stopping Counter = 0/20


Training Epoch : 9 [batch 880/880] - loss = 0.178989: : 880it [07:51,  1.87it/s]
Testing Epoch : 9 [batch 160/160] - loss = 0.173505: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 0/20


Training Epoch : 10 [batch 880/880] - loss = 0.178761: : 880it [07:50,  1.87it/s]
Testing Epoch : 10 [batch 160/160] - loss = 0.173284: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 0/20


Training Epoch : 11 [batch 880/880] - loss = 0.178591: : 880it [07:53,  1.86it/s]
Testing Epoch : 11 [batch 160/160] - loss = 0.173363: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 1/20


Training Epoch : 12 [batch 880/880] - loss = 0.178398: : 880it [07:50,  1.87it/s]
Testing Epoch : 12 [batch 160/160] - loss = 0.173066: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 0/20


Training Epoch : 13 [batch 880/880] - loss = 0.178269: : 880it [07:48,  1.88it/s]
Testing Epoch : 13 [batch 160/160] - loss = 0.172966: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 0/20


Training Epoch : 14 [batch 880/880] - loss = 0.178155: : 880it [07:47,  1.88it/s]
Testing Epoch : 14 [batch 160/160] - loss = 0.173021: : 160it [00:56,  2.82it/s]


Early Stopping Counter = 1/20


Training Epoch : 15 [batch 880/880] - loss = 0.178068: : 880it [07:53,  1.86it/s]
Testing Epoch : 15 [batch 160/160] - loss = 0.172855: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 0/20


Training Epoch : 16 [batch 880/880] - loss = 0.17797: : 880it [07:47,  1.88it/s] 
Testing Epoch : 16 [batch 160/160] - loss = 0.172866: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 1/20


Training Epoch : 17 [batch 851/880] - loss = 0.177858: : 850it [07:33,  1.90it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Training Epoch : 21 [batch 880/880] - loss = 0.17746: : 880it [07:49,  1.88it/s] 
Testing Epoch : 21 [batch 160/160] - loss = 0.172421: : 160it [00:53,  3.00it/s]


Early Stopping Counter = 0/20


Training Epoch : 22 [batch 732/880] - loss = 0.177434: : 731it [06:27,  1.87it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Testing Epoch : 26 [batch 160/160] - loss = 0.172195: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 0/20


Training Epoch : 27 [batch 880/880] - loss = 0.177066: : 880it [07:58,  1.84it/s]
Testing Epoch : 27 [batch 160/160] - loss = 0.17212: : 160it [00:58,  2.73it/s] 


Early Stopping Counter = 0/20


Training Epoch : 28 [batch 880/880] - loss = 0.177006: : 880it [07:47,  1.88it/s]
Testing Epoch : 28 [batch 160/160] - loss = 0.172296: : 160it [00:56,  2.82it/s]


Early Stopping Counter = 1/20


Training Epoch : 29 [batch 754/880] - loss = 0.176879: : 753it [06:41,  1.75it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Training Epoch : 33 [batch 880/880] - loss = 0.176801: : 880it [07:51,  1.86it/s]
Testing Epoch : 33 [batch 160/160] - loss = 0.17196: : 160it [00:58,  2.72it/s] 


Early Stopping Counter = 1/20


Training Epoch : 34 [batch 578/880] - loss = 0.176711: : 577it [05:09,  1.87it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Training Epoch : 38 [batch 880/880] - loss = 0.176627: : 880it [07:57,  1.84it/s]
Testing Epoch : 38 [batch 160/160] - loss = 0.171846: : 160it [00:56,  2.84it/s]


Early Stopping Counter = 1/20


Training Epoch : 39 [batch 723/880] - loss = 0.176438: : 723it [06:29,  1.79it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Testing Epoch : 43 [batch 160/160] - loss = 0.171723: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 44 [batch 880/880] - loss = 0.176451: : 880it [07:53,  1.86it/s]
Testing Epoch : 44 [batch 160/160] - loss = 0.171665: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 0/20


Training Epoch : 45 [batch 349/880] - loss = 0.17692: : 349it [03:09,  1.92it/s] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Training Epoch : 49 [batch 880/880] - loss = 0.176348: : 880it [07:52,  1.86it/s]
Testing Epoch : 49 [batch 160/160] - loss = 0.171807: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 1/20


Training Epoch : 50 [batch 880/880] - loss = 0.176315: : 880it [07:52,  1.86it/s]
Testing Epoch : 50 [batch 160/160] - loss = 0.171798: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 2/20


Training Epoch : 51: : 0it [00:00, ?it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Training Epoch : 54 [batch 880/880] - loss = 0.176225: : 880it [07:55,  1.85it/s]
Testing Epoch : 54 [batch 160/160] - loss = 0.171656: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 1/20


Training Epoch : 55 [batch 880/880] - loss = 0.176208: : 880it [07:51,  1.87it/s]
Testing Epoch : 55 [batch 160/160] - loss = 0.171536: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 0/20


Training Epoch : 56 [batch 880/880] - loss = 0.176197: : 880it [07:52,  1.86it/s]
Testing Epoch : 56 [batch 160/160] - loss = 0.171509: : 160it [00:56,  2.82it/s]


Early Stopping Counter = 0/20


Training Epoch : 57 [batch 880/880] - loss = 0.17617: : 880it [07:51,  1.87it/s] 
Testing Epoch : 57 [batch 160/160] - loss = 0.171514: : 160it [00:56,  2.82it/s]


Early Stopping Counter = 1/20


Training Epoch : 58 [batch 880/880] - loss = 0.176138: : 880it [07:53,  1.86it/s]
Testing Epoch : 58 [batch 160/160] - loss = 0.171568: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 2/20


Training Epoch : 59 [batch 880/880] - loss = 0.176126: : 880it [07:53,  1.86it/s]
Testing Epoch : 59 [batch 160/160] - loss = 0.171491: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 0/20


Training Epoch : 60 [batch 880/880] - loss = 0.176099: : 880it [07:54,  1.86it/s]
Testing Epoch : 60 [batch 160/160] - loss = 0.171402: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 61 [batch 880/880] - loss = 0.17608: : 880it [07:57,  1.84it/s] 
Testing Epoch : 61 [batch 160/160] - loss = 0.171359: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 0/20


Training Epoch : 62 [batch 880/880] - loss = 0.176068: : 880it [07:56,  1.85it/s]
Testing Epoch : 62 [batch 160/160] - loss = 0.171306: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 0/20


Training Epoch : 63 [batch 880/880] - loss = 0.176069: : 880it [07:56,  1.85it/s]
Testing Epoch : 63 [batch 160/160] - loss = 0.171626: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 1/20


Training Epoch : 64 [batch 880/880] - loss = 0.176015: : 880it [07:54,  1.86it/s]
Testing Epoch : 64 [batch 160/160] - loss = 0.171528: : 160it [00:57,  2.81it/s]


Early Stopping Counter = 2/20


Training Epoch : 65 [batch 880/880] - loss = 0.176: : 880it [07:59,  1.83it/s]   
Testing Epoch : 65 [batch 160/160] - loss = 0.171551: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 3/20


Training Epoch : 66 [batch 880/880] - loss = 0.175989: : 880it [07:52,  1.86it/s]
Testing Epoch : 66 [batch 160/160] - loss = 0.171312: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 4/20


Training Epoch : 67 [batch 880/880] - loss = 0.175967: : 880it [07:55,  1.85it/s]
Testing Epoch : 67 [batch 160/160] - loss = 0.1716: : 160it [00:57,  2.77it/s]  


Early Stopping Counter = 5/20


Training Epoch : 68 [batch 880/880] - loss = 0.17595: : 880it [07:54,  1.86it/s] 
Testing Epoch : 68 [batch 160/160] - loss = 0.171388: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 6/20


Training Epoch : 69 [batch 880/880] - loss = 0.175944: : 880it [07:51,  1.87it/s]
Testing Epoch : 69 [batch 160/160] - loss = 0.171351: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 7/20


Training Epoch : 70 [batch 880/880] - loss = 0.175909: : 880it [07:54,  1.86it/s]
Testing Epoch : 70 [batch 160/160] - loss = 0.171332: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 8/20


Training Epoch : 71 [batch 880/880] - loss = 0.175907: : 880it [07:49,  1.88it/s]
Testing Epoch : 71 [batch 160/160] - loss = 0.171355: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 9/20


Training Epoch : 72 [batch 880/880] - loss = 0.175886: : 880it [07:54,  1.85it/s]
Testing Epoch : 72 [batch 160/160] - loss = 0.171391: : 160it [00:56,  2.84it/s]


Early Stopping Counter = 10/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 73 [batch 880/880] - loss = 0.175724: : 880it [07:52,  1.86it/s]
Testing Epoch : 73 [batch 160/160] - loss = 0.171239: : 160it [00:56,  2.84it/s]


Early Stopping Counter = 0/20


Training Epoch : 74 [batch 880/880] - loss = 0.175721: : 880it [07:58,  1.84it/s]
Testing Epoch : 74 [batch 160/160] - loss = 0.171263: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 1/20


Training Epoch : 75 [batch 880/880] - loss = 0.175711: : 880it [07:53,  1.86it/s]
Testing Epoch : 75 [batch 160/160] - loss = 0.171186: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 76 [batch 880/880] - loss = 0.175708: : 880it [07:54,  1.85it/s]
Testing Epoch : 76 [batch 160/160] - loss = 0.171252: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 1/20


Training Epoch : 77 [batch 880/880] - loss = 0.175686: : 880it [07:59,  1.84it/s]
Testing Epoch : 77 [batch 160/160] - loss = 0.171219: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 2/20


Training Epoch : 78 [batch 880/880] - loss = 0.175686: : 880it [07:57,  1.84it/s]
Testing Epoch : 78 [batch 160/160] - loss = 0.171184: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 79 [batch 880/880] - loss = 0.175668: : 880it [07:49,  1.87it/s]
Testing Epoch : 79 [batch 160/160] - loss = 0.171259: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 1/20


Training Epoch : 80 [batch 880/880] - loss = 0.17566: : 880it [07:55,  1.85it/s] 
Testing Epoch : 80 [batch 160/160] - loss = 0.171148: : 160it [00:58,  2.74it/s]


Early Stopping Counter = 0/20


Training Epoch : 81 [batch 880/880] - loss = 0.175659: : 880it [07:55,  1.85it/s]
Testing Epoch : 81 [batch 160/160] - loss = 0.171156: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 1/20


Training Epoch : 82 [batch 880/880] - loss = 0.175647: : 880it [07:52,  1.86it/s]
Testing Epoch : 82 [batch 160/160] - loss = 0.171173: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 2/20


Training Epoch : 83 [batch 880/880] - loss = 0.175641: : 880it [07:58,  1.84it/s]
Testing Epoch : 83 [batch 160/160] - loss = 0.171225: : 160it [00:58,  2.74it/s]


Early Stopping Counter = 3/20


Training Epoch : 84 [batch 880/880] - loss = 0.175624: : 880it [07:54,  1.86it/s]
Testing Epoch : 84 [batch 160/160] - loss = 0.171138: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 0/20


Training Epoch : 85 [batch 880/880] - loss = 0.175622: : 880it [07:55,  1.85it/s]
Testing Epoch : 85 [batch 160/160] - loss = 0.171208: : 160it [00:57,  2.81it/s]


Early Stopping Counter = 1/20


Training Epoch : 86 [batch 880/880] - loss = 0.175618: : 880it [07:51,  1.86it/s]
Testing Epoch : 86 [batch 160/160] - loss = 0.171121: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 87 [batch 880/880] - loss = 0.175609: : 880it [07:51,  1.87it/s]
Testing Epoch : 87 [batch 160/160] - loss = 0.171236: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 1/20


Training Epoch : 88 [batch 880/880] - loss = 0.175596: : 880it [07:54,  1.85it/s]
Testing Epoch : 88 [batch 160/160] - loss = 0.171183: : 160it [00:56,  2.85it/s]


Early Stopping Counter = 2/20


Training Epoch : 89 [batch 880/880] - loss = 0.175598: : 880it [07:57,  1.84it/s]
Testing Epoch : 89 [batch 160/160] - loss = 0.171122: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 3/20


Training Epoch : 90 [batch 880/880] - loss = 0.175576: : 880it [07:53,  1.86it/s]
Testing Epoch : 90 [batch 160/160] - loss = 0.171354: : 160it [00:59,  2.71it/s]


Early Stopping Counter = 4/20


Training Epoch : 91 [batch 880/880] - loss = 0.175582: : 880it [07:56,  1.85it/s]
Testing Epoch : 91 [batch 160/160] - loss = 0.171128: : 160it [00:58,  2.73it/s]


Early Stopping Counter = 5/20


Training Epoch : 92 [batch 880/880] - loss = 0.175569: : 880it [08:00,  1.83it/s]
Testing Epoch : 92 [batch 160/160] - loss = 0.17116: : 160it [00:56,  2.83it/s] 


Early Stopping Counter = 6/20


Training Epoch : 93 [batch 880/880] - loss = 0.175571: : 880it [07:54,  1.86it/s]
Testing Epoch : 93 [batch 160/160] - loss = 0.171458: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 7/20


Training Epoch : 94 [batch 880/880] - loss = 0.175566: : 880it [07:57,  1.84it/s]
Testing Epoch : 94 [batch 160/160] - loss = 0.171054: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 95 [batch 880/880] - loss = 0.17555: : 880it [07:59,  1.84it/s] 
Testing Epoch : 95 [batch 160/160] - loss = 0.171045: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 0/20


Training Epoch : 96 [batch 880/880] - loss = 0.175548: : 880it [07:52,  1.86it/s]
Testing Epoch : 96 [batch 160/160] - loss = 0.171249: : 160it [00:58,  2.76it/s]


Early Stopping Counter = 1/20


Training Epoch : 97 [batch 880/880] - loss = 0.175542: : 880it [07:55,  1.85it/s]
Testing Epoch : 97 [batch 160/160] - loss = 0.171158: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 2/20


Training Epoch : 98 [batch 880/880] - loss = 0.175538: : 880it [07:53,  1.86it/s]
Testing Epoch : 98 [batch 160/160] - loss = 0.171158: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 3/20


Training Epoch : 99 [batch 880/880] - loss = 0.175542: : 880it [07:53,  1.86it/s]
Testing Epoch : 99 [batch 160/160] - loss = 0.171017: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 0/20


Training Epoch : 100 [batch 880/880] - loss = 0.175523: : 880it [07:58,  1.84it/s]
Testing Epoch : 100 [batch 160/160] - loss = 0.171033: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 1/20


Training Epoch : 101 [batch 880/880] - loss = 0.175515: : 880it [07:51,  1.87it/s]
Testing Epoch : 101 [batch 160/160] - loss = 0.171229: : 160it [00:58,  2.74it/s]


Early Stopping Counter = 2/20


Training Epoch : 102 [batch 880/880] - loss = 0.175511: : 880it [07:58,  1.84it/s]
Testing Epoch : 102 [batch 160/160] - loss = 0.171166: : 160it [00:56,  2.84it/s]


Early Stopping Counter = 3/20


Training Epoch : 103 [batch 880/880] - loss = 0.175507: : 880it [07:54,  1.86it/s]
Testing Epoch : 103 [batch 160/160] - loss = 0.171194: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 4/20


Training Epoch : 104 [batch 880/880] - loss = 0.175501: : 880it [07:57,  1.84it/s]
Testing Epoch : 104 [batch 160/160] - loss = 0.171033: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 5/20


Training Epoch : 105 [batch 880/880] - loss = 0.175497: : 880it [07:55,  1.85it/s]
Testing Epoch : 105 [batch 160/160] - loss = 0.171153: : 160it [00:56,  2.83it/s]


Early Stopping Counter = 6/20


Training Epoch : 106 [batch 880/880] - loss = 0.175489: : 880it [07:54,  1.85it/s]
Testing Epoch : 106 [batch 160/160] - loss = 0.171091: : 160it [00:57,  2.76it/s]


Early Stopping Counter = 7/20


Training Epoch : 107 [batch 880/880] - loss = 0.175499: : 880it [07:57,  1.84it/s]
Testing Epoch : 107 [batch 160/160] - loss = 0.171104: : 160it [00:56,  2.81it/s]


Early Stopping Counter = 8/20


Training Epoch : 108 [batch 880/880] - loss = 0.175477: : 880it [07:53,  1.86it/s]
Testing Epoch : 108 [batch 160/160] - loss = 0.171227: : 160it [00:58,  2.76it/s]


Early Stopping Counter = 9/20


Training Epoch : 109 [batch 880/880] - loss = 0.175471: : 880it [07:54,  1.85it/s]
Testing Epoch : 109 [batch 160/160] - loss = 0.171058: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 10/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 110 [batch 880/880] - loss = 0.175384: : 880it [07:57,  1.84it/s]
Testing Epoch : 110 [batch 160/160] - loss = 0.171063: : 160it [00:55,  2.86it/s]


Early Stopping Counter = 11/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 111 [batch 880/880] - loss = 0.175336: : 880it [07:52,  1.86it/s]
Testing Epoch : 111 [batch 160/160] - loss = 0.170997: : 160it [00:57,  2.80it/s]


Early Stopping Counter = 0/20


Training Epoch : 112 [batch 880/880] - loss = 0.175334: : 880it [07:56,  1.85it/s]
Testing Epoch : 112 [batch 160/160] - loss = 0.171074: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 1/20


Training Epoch : 113 [batch 880/880] - loss = 0.175332: : 880it [07:57,  1.84it/s]
Testing Epoch : 113 [batch 160/160] - loss = 0.170975: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 0/20


Training Epoch : 114 [batch 880/880] - loss = 0.175329: : 880it [08:00,  1.83it/s]
Testing Epoch : 114 [batch 160/160] - loss = 0.170965: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 0/20


Training Epoch : 115 [batch 880/880] - loss = 0.17533: : 880it [07:51,  1.87it/s] 
Testing Epoch : 115 [batch 160/160] - loss = 0.170985: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 1/20


Training Epoch : 116 [batch 880/880] - loss = 0.175325: : 880it [07:53,  1.86it/s]
Testing Epoch : 116 [batch 160/160] - loss = 0.171002: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 2/20


Training Epoch : 117 [batch 880/880] - loss = 0.175323: : 880it [07:56,  1.85it/s]
Testing Epoch : 117 [batch 160/160] - loss = 0.171025: : 160it [00:58,  2.73it/s]


Early Stopping Counter = 3/20


Training Epoch : 118 [batch 880/880] - loss = 0.175321: : 880it [07:52,  1.86it/s]
Testing Epoch : 118 [batch 160/160] - loss = 0.17106: : 160it [00:57,  2.80it/s] 


Early Stopping Counter = 4/20


Training Epoch : 119 [batch 880/880] - loss = 0.17532: : 880it [07:57,  1.84it/s] 
Testing Epoch : 119 [batch 160/160] - loss = 0.171089: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 5/20


Training Epoch : 120 [batch 880/880] - loss = 0.175317: : 880it [07:55,  1.85it/s]
Testing Epoch : 120 [batch 160/160] - loss = 0.171051: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 6/20


Training Epoch : 121 [batch 880/880] - loss = 0.175314: : 880it [07:55,  1.85it/s]
Testing Epoch : 121 [batch 160/160] - loss = 0.171042: : 160it [00:58,  2.74it/s]


Early Stopping Counter = 7/20


Training Epoch : 122 [batch 880/880] - loss = 0.17531: : 880it [07:53,  1.86it/s] 
Testing Epoch : 122 [batch 160/160] - loss = 0.171017: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 8/20


Training Epoch : 123 [batch 880/880] - loss = 0.17531: : 880it [07:56,  1.85it/s] 
Testing Epoch : 123 [batch 160/160] - loss = 0.171085: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 9/20


Training Epoch : 124 [batch 880/880] - loss = 0.175306: : 880it [07:54,  1.86it/s]
Testing Epoch : 124 [batch 160/160] - loss = 0.171025: : 160it [00:57,  2.79it/s]


Early Stopping Counter = 10/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 125 [batch 880/880] - loss = 0.17528: : 880it [07:57,  1.84it/s] 
Testing Epoch : 125 [batch 160/160] - loss = 0.170995: : 160it [00:58,  2.71it/s]


Early Stopping Counter = 11/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 126 [batch 880/880] - loss = 0.175266: : 880it [07:55,  1.85it/s]
Testing Epoch : 126 [batch 160/160] - loss = 0.170987: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 12/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 127 [batch 880/880] - loss = 0.175259: : 880it [08:00,  1.83it/s]
Testing Epoch : 127 [batch 160/160] - loss = 0.170989: : 160it [00:57,  2.77it/s]


Early Stopping Counter = 13/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 128 [batch 880/880] - loss = 0.175255: : 880it [07:48,  1.88it/s]
Testing Epoch : 128 [batch 160/160] - loss = 0.170979: : 160it [00:58,  2.75it/s]


Early Stopping Counter = 14/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 129 [batch 880/880] - loss = 0.175252: : 880it [07:54,  1.85it/s]
Testing Epoch : 129 [batch 160/160] - loss = 0.170988: : 160it [00:57,  2.78it/s]


Early Stopping Counter = 15/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 130 [batch 880/880] - loss = 0.175251: : 880it [07:53,  1.86it/s]
Testing Epoch : 130 [batch 160/160] - loss = 0.17098: : 160it [00:57,  2.77it/s] 


Early Stopping Counter = 16/20
Loss stops improving for 10 epochs -> LR step by 0.5


Training Epoch : 131 [batch 357/880] - loss = 0.174939: : 357it [03:13,  1.87it/s]