# Pytorch "Context" experiment

## The general idea

Any module is just a semi-defined function. We should be able to insert it into any "context" and define it via training the entire context. 

More concretely, a context is a wrapper of pretrained modules on either side with a loss function that we are trying to optimize on the other end.

## Goal:

define a context object with general optimize, learning rate (scheduler), dataloader options. Basically what my `train_model` function does. But with option 

## new general idea.

modules are learnable functions. we just slide them in and out as needed and resuse as desired.

# Libraries

In [14]:
import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
from torchvision.utils import save_image
from torch.utils.data import  Dataset, DataLoader


import math
from sklearn.mixture import GaussianMixture
from torchvision import datasets, transforms
from collections import OrderedDict
from torch.optim import lr_scheduler
import time
import os
import copy



In [3]:
import nibabel as nib
import dask.dataframe as dd
import dask.array as da
from helperFunctions2 import get_experiment_data, extract_vector_features_from_matrix, extract_covariates_from_matrix
import numpy as np
import pandas as pd
from nilearn import image
from nilearn.image import mean_img
from plot_brain import plot_brain
import dask
from dask.delayed import delayed
from ipywidgets import FloatSlider, ColorPicker, VBox, jslink


# Old Code

In [2]:
class PacDataset(Dataset):
    """Pac  dataset. - taking one slice out of each image """
    @staticmethod
    def _jload(file_id):
        
        full_path = PacDataset.file_template % file_id
        img = nib.load(full_path)
        img_data = img.get_data()
        return img_data

    def __init__(self, root_dir="./Pac Data/pac2018/", train=True):
        """
        Args:
            root_dir (string): Directory with all the images.
        """
        exp0_a = get_experiment_data(0)
        if train:
            train_sel = 0
        else:
            train_sel = 1
        self.train0_df = pd.DataFrame(exp0_a[train_sel], 
                                columns=['file_id','cond','age','gender','vol','site'])
        PacDataset.file_template = root_dir + "%s.nii"
        

    def __len__(self):
        return self.train0_df.shape[0]

    def __getitem__(self, idx):
        z_cut = 60
        file_id = self.train0_df.iloc[idx]['file_id']
        img_data = PacDataset._jload(file_id)
        
        inputs = torch.Tensor(img_data[:, :, z_cut].flatten())
        return inputs
       

In [9]:

criterion = torch.nn.MSELoss()

datasets = {x: PacDataset(train=(x=='train')) for x in ['val','train']}

# increasing num_workers sped things up considerably.
dataloaders = {x: torch.utils.data.DataLoader(
    datasets[x], 
    batch_size=10, 
    num_workers=5) for x in ['train','val']}

dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}


In [16]:


use_gpu = use_cuda = torch.cuda.is_available()

def train_model(model, 
                criterion, 
                optimizer, 
                scheduler, 
                dataloaders,
                num_epochs=25, 
                autoencoder=True,
                pretrained = None):
    
    """ 
    general model traing function 
    could be _fit method in model class
    
    pretrained - is a pretrained model to transform the inputs
    not sure if its using the gpu at this point
    
    uses pretained to make stacked auto-encoders
    """
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                if autoencoder:
                    inputs = data
                    if use_gpu:
                        # put inputs on gpu
                        labels = inputs = Variable(inputs.cuda())
                        # put model on gpu
                        if pretrained:
                            pretrained.cuda()
                    else:
                        labels = inputs = Variable(inputs)
                    if pretrained:
                        labels = inputs = pretrained(inputs)
                else:
                    inputs, labels = data
                    # wrap them in Variable
                    if use_gpu:
                        inputs = Variable(inputs.cuda())
                        labels = (labels.cuda())
                    else:
                        inputs, labels = Variable(inputs), Variable(labels)
                    if pretained:
                        inputs = pretrained(inputs)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
#                 print( outputs )  #cuz ...
#                 _, preds = torch.max(outputs.data, 1) # ????
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                if not autoencoder:
                    running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

# New Code

In [41]:
def freeze_module(mod, make_copy=False):
    """
    Parameters:
    mod : a nn.Module
    make_copy: boolean to determine if a copy is returned
    
    Returns: a module with all parameter's gradients turned off
    """
    mod_ = mod if make_copy else copy.deepcopy(mod)  
    for p in mod_.parameters():
        p.requires_grad = False
    return mod_

def thaw_module(mod, make_copy=False):
    """
    Parameters:
    mod : a nn.Module
    make_copy: boolean to determine if a copy is returned
    
    Returns: a module with all parameter's gradients turned on
    """
    mod_ = mod if make_copy else copy.deepcopy(mod)
    for p in mod_.parameters():
        p.requires_grad = True
    return mod_
 
def test_freeze_module():
    m = nn.Linear(3,5)
    mm = freeze_module(m, make_copy=True)
    assert(isinstance(mm, nn.Module))
    assert(m.state_dict == mm.state_dict)
    assert(all([not p.requires_grad for p in mm.parameters()]))

def test_thaw_module():
    m = nn.Linear(3,5)
    mm = thaw_module(m, make_copy=True)
    assert(isinstance(mm, nn.Module))
    assert(m.state_dict == mm.state_dict)
    assert(all([p.requires_grad for p in mm.parameters()]))

In [42]:
test_freeze_module()
test_thaw_module()

In [61]:
def make_ae(in_out_features, hidden_features):
    m = nn.Sequential(nn.Linear(in_out_features, hidden_features),
                      nn.ReLU(),
                      nn.Linear(hidden_features, in_out_features))
    return m

def test_make_ae():
    m = make_ae(10,5)
    assert( isinstance(m, nn.Sequential))
    assert( m[0].in_features == 10)
    assert( m[0].out_features == 5)    
    assert( m[2].in_features == 5)
    assert( m[2].out_features == 10)

In [62]:
test_make_ae()

In [94]:
def stack_ae(hidden_features, stack_in = 1, stack_ae_net = None):
    """
    stack_ae_net is a sequence of two modules
    - the first is trained sequence
    - the second is a freshly trained ae
    add ae_net[0],ReLU to  stack_ae
    create new ae
    
    returns a nn.Sequential object of two parts, the first trained, latter to be 
    trained
    """
    if stack_ae_net == None:
        # if no stack_ae_net, make one with empty stack and new ae
        stack_ae = None # nn.Sequential()
        new_ae = make_ae(stack_in, hidden_features) ### left off here
        stack_ae_net = nn.Sequential(
            OrderedDict([('stack',stack_ae), ('ae_net',new_ae)]))
        return stack_ae_net
    
    stack_ae = stack_ae_net.stack
    if not stack_ae: stack_ae = nn.Sequential()
    ae_net = stack_ae_net.ae_net
    first_linear = ae_net[0]
    old_hidden = first_linear.out_features
    new_ae = make_ae(old_hidden, hidden_features)
    stack_depth = len(stack_ae)
    stack_ae.add_module(f'ae{stack_depth}', first_linear)
    stack_ae.add_module(f'ReLU{stack_depth}', nn.ReLU())
    stack_ae = freeze_module(stack_ae)
    full_stack = nn.Sequential( OrderedDict(
        [('stack', stack_ae), ('ae_net', new_ae)]))
                              
    return full_stack
    
def test_stack_ae():
    sae = stack_ae(5, stack_in = 3) #, stack_in=5)
    print(sae)
    assert( isinstance(sae, nn.Sequential))
    sae = stack_ae(8, stack_ae_net = sae)
    sae = stack_ae(4, stack_ae_net = sae)
    print(sae)
    assert( isinstance(sae, nn.Sequential))
    assert( isinstance(sae.stack, nn.Sequential))
    assert( isinstance(sae.ae_net, nn.Sequential))    
    assert( sae.stack[-2].out_features == sae.ae_net[0].in_features )
    
    

In [86]:
test_stack_ae()

Sequential(
  (stack): None
  (ae_net): Sequential(
    (0): Linear(in_features=3, out_features=5, bias=True)
    (1): ReLU()
    (2): Linear(in_features=5, out_features=3, bias=True)
  )
)
Sequential(
  (stack): Sequential(
    (ae0): Linear(in_features=3, out_features=5, bias=True)
    (ReLU0): ReLU()
    (ae2): Linear(in_features=5, out_features=8, bias=True)
    (ReLU2): ReLU()
  )
  (ae_net): Sequential(
    (0): Linear(in_features=8, out_features=4, bias=True)
    (1): ReLU()
    (2): Linear(in_features=4, out_features=8, bias=True)
  )
)


In [96]:

# make initial full_stack having no stack and an ae_net (17545x500x17545)
full_stack = stack_ae(500,121*145)
for num_features in [500, 2000, 10]:
    print(full_stack)
    optimizer = torch.optim.SGD(full_stack.ae_net.parameters(), lr=0.01)
    train_model(full_stack.ae_net, 
                    criterion, 
                    optimizer, 
                    scheduler, 
                    dataloaders,
                    num_epochs=2, 
                    autoencoder=True,
                    pretrained = full_stack.stack)
    full_stack = stack_ae(num_features, stack_ae_net = full_stack )

Sequential(
  (stack): None
  (ae_net): Sequential(
    (0): Linear(in_features=17545, out_features=2000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=2000, out_features=17545, bias=True)
  )
)
Epoch 0/1
----------
train Loss: 0.0995 Acc: 0.0000
val Loss: 0.0992 Acc: 0.0000

Epoch 1/1
----------
train Loss: 0.0961 Acc: 0.0000
val Loss: 0.0973 Acc: 0.0000

Training complete in 1m 40s
Best val Acc: 0.000000
Sequential(
  (stack): Sequential(
    (ae0): Linear(in_features=17545, out_features=2000, bias=True)
    (ReLU0): ReLU()
  )
  (ae_net): Sequential(
    (0): Linear(in_features=2000, out_features=500, bias=True)
    (1): ReLU()
    (2): Linear(in_features=500, out_features=2000, bias=True)
  )
)
Epoch 0/1
----------
train Loss: 0.0179 Acc: 0.0000
val Loss: 0.0181 Acc: 0.0000

Epoch 1/1
----------
train Loss: 0.0176 Acc: 0.0000
val Loss: 0.0178 Acc: 0.0000

Training complete in 0m 37s
Best val Acc: 0.000000
Sequential(
  (stack): Sequential(
    (ae0): Linear(in_features=175

In [97]:
print(full_stack)

Sequential(
  (stack): Sequential(
    (ae0): Linear(in_features=17545, out_features=2000, bias=True)
    (ReLU0): ReLU()
    (ae2): Linear(in_features=2000, out_features=500, bias=True)
    (ReLU2): ReLU()
    (ae4): Linear(in_features=500, out_features=500, bias=True)
    (ReLU4): ReLU()
  )
  (ae_net): Sequential(
    (0): Linear(in_features=500, out_features=10, bias=True)
    (1): ReLU()
    (2): Linear(in_features=10, out_features=500, bias=True)
  )
)


In [52]:
nn.Sequential()

Sequential(
)

In [43]:
from IPython.display import SVG