In [None]:
## Lines for Google Colab to import Drive repository and configure GitHub
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My Drive/capita_selecta_cvbm/notebooks
!pip install nilearn

In [None]:
## Lines for Google Colab to push and pull form GitHub repository
# %cd /content/gdrive/My Drive/capita_selecta_cvbm
# !git pull origin master

# !git remote rm origin
# !git remote add origin https://Beerend:XXXXX@github.com/Beerend/TReNDS.git

# !git pull origin master
# !git status
# !git add train_TReNDS.ipynb
# !git commit -m 'Added MAE loss'
# !git push origin master

In [3]:
import os
import time
import numpy as np
import pandas as pd
import torch
from scipy import stats
from torch.utils.data import DataLoader
from torch.nn import MSELoss, L1Loss
from datasets import TReNDS
from datasets.TReNDS import TReNDSDataset
from models import resnet, deeplight, resnet_4d
from google.colab import output
from importlib import reload

# reload(TReNDS)
# reload(resnet_4d)
# from datasets.TReNDS import TReNDSDataset
# from models import resnet_4d

if os.name=='posix': google_colab = True
else: google_colab = False

In [5]:
if google_colab:
    print('Working from a Google Colab environment')
    data_path = '/content/gdrive/My Drive/capita_selecta_cvbm'
    root = '/content/gdrive/My Drive/capita_selecta_cvbm'
else:
    data_path = '/Volumes/External Hard Drive/Documents/University of Twente/Computer Science/Capita Selecta'
    root = '../'

available_models = ['deeplight',
                    'deeplight_tempframe_26',
                    'deeplight_resnet10',
                    'resnet10',
                    'resnet10_4d']

model_name = 'resnet10_4d'
fold_index = 1

# Options
opts = {
    'rand_seed'  : 1,
    'no_cuda'    : False,
    'temp_mean'  : False,
    'preprocess' : False, #Adds ±1.2s per data sample, per epoch on CPU
    'scale_norm' : True, #Adds no distinctive additional time to processing
    'lr'         : 1e-4,
    'train_bs'   : 1,
    'test_bs'    : 1,
    'epochs'     : 60,
    'fold_index' : fold_index,
    'n_splits'   : 5,
    'model_name' : model_name,
    'save_at_eps': list(range(1,61)),
    'test_at_eps': list(range(1,61)),
    'save_dir'   : os.path.join(root, 'results/%s/%s'%(model_name, str(fold_index))),
    'resume'     : os.path.join(root, 'results/resnet10_4d/1/epoch_3.pth.tar'),
    'pretrain'   : None,
}

if not os.path.exists(opts['save_dir']):
    os.makedirs(opts['save_dir'])
    
torch.manual_seed(opts['rand_seed'])
earlier_epochs = 0

Working from a Google Colab environment


In [10]:
# Generate model
assert model_name in available_models
if model_name=='deeplight':
    model = deeplight.original()
elif model_name=='deeplight_tempframe_26':
    model = deeplight.original(temp_frame=26)
elif model_name=='resnet10':
    model = resnet.resnet10(shortcut_type='B', no_cuda=opts['no_cuda'], num_class=1)
elif model_name=='resnet10_4d':
    model = resnet_4d.resnet10_4d(shortcut_type='B', no_cuda=opts['no_cuda'], num_class=1)
    
optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opts['lr']) #, betas=(.9,.999), eps=1e-08)
mse   = MSELoss()
mae   = L1Loss()

if not opts['no_cuda']:
    model.cuda()
    
num_params    = sum(p.numel() for p in model.parameters())
num_tr_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Loaded %s (param: %d, trainable: %d, GPU: %s)'%(model_name, num_params, num_tr_params, not opts['no_cuda']))

Loaded resnet10_4d (param: 42799297, trainable: 42799297, GPU: True)


In [11]:
# Train from checkpoint
if opts['resume']:
    if os.path.isfile(opts['resume']):
        print('Loading checkpoint from:', opts['resume'])
        if opts['no_cuda']:
          load_dict = torch.load(opts['resume'], map_location=torch.device('cpu'))
        else:
          load_dict = torch.load(opts['resume'])
        model.load_state_dict(load_dict['state_dict'])
        optim.load_state_dict(load_dict['optim'])
        earlier_epochs = load_dict['epoch']
        print('Earlier epochs:', earlier_epochs)
        
# Train from pre-trained model
elif opts['pretrain']:
    if os.path.isfile(opts['pretrain']):
        print('Loading pre-trained weights from:', opts['pretrain'])
        model_dict = model.state_dict()
        pretrain   = torch.load(opts['pretrain'])
        pretr_dict = {k:v for k,v in pretrain['state_dict'].items() if k in model_dict.keys() and 'conv1' not in k}
        model_dict.update(pretr_dict)
        model.load_state_dict(model_dict)

Loading checkpoint from: /content/gdrive/My Drive/capita_selecta_cvbm/results/resnet10_4d/1/epoch_3.pth.tar
Earlier epochs: 3


In [12]:
# Get dataset
train_set    = TReNDSDataset(data_path, 'train', n_splits=opts['n_splits'], fold=fold_index,
                             preprocess=opts['preprocess'], norm=opts['scale_norm'],
                             temp_mean=opts['temp_mean'])
train_loader = DataLoader(train_set, batch_size=opts['train_bs'], shuffle=True, pin_memory=True)
test_set     = TReNDSDataset(data_path, 'test', n_splits=opts['n_splits'], fold=fold_index,
                             preprocess=opts['preprocess'], norm=opts['scale_norm'],
                             temp_mean=opts['temp_mean'])
test_loader  = DataLoader(test_set, batch_size=opts['test_bs'], shuffle=False, pin_memory=True)

Loaded dataset with 4701 train samples in fold 1.
Loaded dataset with 1176 test samples in fold 1.


In [None]:
# Train model
if not opts['resume']:
    log_file = open(os.path.join(opts['save_dir'], 'log.txt'), 'w')
    log_file.write('Epoch,set,time,MSE,MAE,Pearson r,p-value\n')
    log_file.flush()
    log_file.close()

for epoch in range(1+earlier_epochs, opts['epochs']+1+earlier_epochs):
    model.train()
    # TODO: adjust learning rate
    start_time = time.time()
    batches    = len(train_loader)
    batch_id   = 1
    tot_mae    = 0.
    tot_mse    = 0.
    
    for batch_data in train_loader:
        imgs, lbls   = batch_data

        if not opts['no_cuda']:
            imgs = imgs.cuda()
            lbls = lbls.cuda()
        
        optim.zero_grad()
        preds    = model(imgs)
        mae_loss = mae(preds, lbls)
        mse_loss = mse(preds, lbls)
        mae_loss.backward() # Optimizing with MAE
        if model_name=='deeplight':
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optim.step()
        
        avg_batch_time = (time.time()-start_time)/batch_id
        eta = (batches-batch_id)*avg_batch_time/60
        if google_colab:
            output.clear('batch_inf')
            with output.use_tags('batch_inf'):
                print('Training model => Epoch: %d/%d - batch: %d/%d - loss: %.5f (MSE) %.5f (MAE) - time: %.1f (avg) %.1f (ETA in min.)'%(epoch,
                    opts['epochs']+earlier_epochs, batch_id, batches, mse_loss.item(), mae_loss.item(), avg_batch_time, eta))
        else:
            print('Training model => Epoch: %d/%d - batch: %d/%d - loss: %.5f (MSE) %.5f (MAE) - time: %.1f (avg) %.1f (ETA in min.)'%(epoch,
                opts['epochs']+earlier_epochs, batch_id, batches, mse_loss.item(), mae_loss.item(), avg_batch_time, eta), end='\r')
        
        tot_mae+=mae_loss.item()
        tot_mse+=mse_loss.item()
        batch_id+=1
    
    avg_mae  = tot_mae/batches
    avg_mse  = tot_mse/batches
    tot_time = time.time()-start_time
    
    log_file = open(os.path.join(opts['save_dir'], 'log.txt'), 'a')
    log_file.write('%d,train,%.1f,%.5f,%.5f\n'%(epoch, tot_time, avg_mse, avg_mae))
    log_file.flush()
    log_file.close()
            
    if epoch==opts['epochs'] or epoch in opts['save_at_eps']:
        filename = os.path.join(opts['save_dir'], 'epoch_%d.pth.tar'%(epoch))
        torch.save({'epoch':epoch, 'state_dict':model.state_dict(),
            'optim':optim.state_dict()}, filename)
        
    if epoch in opts['test_at_eps']:
        results = evaluate_model(test_loader, model, mae, mse, epoch, opts)

In [13]:
def evaluate_model(test_loader, model, mae, mse, epoch, opts):
    model.eval()
    start_time = time.time()
    batches    = len(test_loader)
    all_preds  = []
    all_labls  = []
    tot_mae    = 0.
    tot_mse    = 0.

    with torch.no_grad():
        batch_id = 1
        for batch_data in test_loader:
            imgs, lbls  = batch_data

            if not opts['no_cuda']:
                imgs = imgs.cuda()
                lbls = lbls.cuda()
            
            preds    = model(imgs)
            mae_loss = mae(preds, lbls)
            mse_loss = mse(preds, lbls)

            avg_batch_time = (time.time()-start_time)/batch_id
            eta = (batches-batch_id)*avg_batch_time/60
            if google_colab:
                output.clear('batch_inf')
                with output.use_tags('batch_inf'):
                    print('Evaluating model => Batch: %d/%d - loss: %.5f (MSE) %.5f (MAE) - time: %.1f (avg) %.1f (ETA in min.)'%(batch_id,
                        batches, mse_loss.item(), mae_loss.item(), avg_batch_time, eta))
            
            tot_mae+=mae_loss.item()
            tot_mse+=mse_loss.item()
            all_preds.append(preds.data.cpu().numpy().flatten())
            all_labls.append(lbls.data.cpu().numpy().flatten())
            batch_id+=1

    avg_mae  = tot_mae/batches
    avg_mse  = tot_mse/batches
    tot_time = time.time()-start_time
    
    all_preds = np.concatenate(all_preds, axis=0)
    all_labls = np.concatenate(all_labls, axis=0)
    filename  = os.path.join(opts['save_dir'], 'preds_epoch_%d.csv'%(epoch))
    results   = pd.DataFrame(data={'Pred':all_preds, 'Label':all_labls})
    results.to_csv(filename, index=False)

    r, p = stats.pearsonr(all_preds.tolist(), all_labls.tolist())

    log_file = open(os.path.join(opts['save_dir'], 'log.txt'), 'a')
    log_file.write('%d,test,%.1f,%.5f,%.5f,%.5f,%.7f\n'%(epoch, tot_time, avg_mse, avg_mae, r, p))
    log_file.flush()
    log_file.close()

    output.clear('batch_inf')
    print('Average loss: %.3f (MSE) %.3f (MAE)'%(avg_mae,avg_mse))

    return results

**Perform an isolated evaluation and print 20 predictions**

In [None]:
results = evaluate_model(test_loader, model, mae, mse, 2, opts)
results.head(20)

**Calculate losses from predictions csv file**

In [None]:
results = pd.read_csv('../results/resnet10_4d/0/preds_epoch_2.csv')

all_preds = results['Pred'].to_list()
all_labls = results['Label'].to_list()

mae = 0.
mse = 0.
for i in range(len(all_preds)):
    pred = all_preds[i]
    labl = all_labls[i]
    mae += np.abs(pred-labl)
    mse += (pred-labl)**2

mae /= len(all_preds)
mse /= len(all_preds)
print(mae, mse)

10.847196312925162 181.8990364785956


**Calculate mean, minimum and maximum values of dataset set**

In [None]:
mean = 0.0
min  = 999.0
max  = -999.0

batch_id = 1
batches  = len(train_loader)
for batch_data in train_loader:
    assert opts['train_bs']==1
    img, lbls = batch_data

    img_mean = torch.mean(img).item()
    img_min  = torch.min(img).item()
    img_max  = torch.max(img).item()

    mean+=img_mean

    if img_min<min:
        min = img_min
    if img_max>max:
        max = img_max

    output.clear('batch_inf')
    with output.use_tags('batch_inf'):
        print('Calculating => Batch: %d/%d'%(batch_id, batches))
    batch_id+=1

mean/=batches

output.clear('batch_inf')
with output.use_tags('batch_inf'):
    print('Mean:', mean, 'Min:', min, 'Max:', max, 'Fold:', fold_index)

Mean: 0.0513693805085198 Min: -26.703125 Max: 25.53125 Fold: 4


In [None]:
mean = 0.0

batch_id = 1
batches  = len(train_loader)
for batch_data in train_loader:
    img, lbls = batch_data
    img_mean = torch.mean(img).item()
    mean+=img_mean

    output.clear('batch_inf')
    with output.use_tags('batch_inf'):
        print('Calculating => Batch: %d/%d'%(batch_id, batches))
    batch_id+=1

batch_id = 1
batches  = len(test_loader)
for batch_data in test_loader:
    img, lbls = batch_data
    img_mean = torch.mean(img).item()
    mean+=img_mean
    
    output.clear('batch_inf')
    with output.use_tags('batch_inf'):
        print('Calculating => Batch: %d/%d'%(batch_id, batches))
    batch_id+=1

mean/=(len(train_loader)+len(test_loader))
print('Mean:', mean)

Calculating => Batch: 1175/1175


Mean: 0.051363078512534084
