# CNN

Use Resnet to train models to fit five travel behavioral variables by using images. The default is using only the black and white images with resnet18 to fit continous outputs, but the codes leave the flexibility of using other models, images, and output types. Several functions are not useful yet: bottleneck_resnet18, return_bottleneck_resnet18, and train_discrete_model. 

#### To be done: adjustment along many dimensions - hyperparameters, model choice, etc.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import util
import statsmodels.api as sm
from scipy import stats
import copy

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import log_loss
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# ALWAYS choose devise first.
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Helper Functions

In [3]:
def initialize_data(image_type, output_var, output_type, input_var, BE_var, num_categories, size):
    # outputs: randonmized training and testing sets for NHTS, BE, images, and y.
    
    ### read image array
    if image_type == 'rgb':
        image_array_ = np.load("data_process/image_array_rgb_tract_large.npy", mmap_mode='r')
        image_array = image_array_[:size,]
    elif image_type == 'bw':
        image_array_ = np.load("data_process/image_array_bw_tract_large.npy", mmap_mode='r')
        image_array = image_array_[:size,]        
    elif image_type == 'merge':
        bw_image_array_ = np.load("data_process/image_array_bw_tract_large.npy", mmap_mode='r')
        rgb_image_array_ = np.load("data_process/image_array_rgb_tract_large.npy", mmap_mode='r')
        bw_image_array = bw_image_array_[:size,]
        rgb_image_array = rgb_image_array_[:size,]        
        image_array = np.concatenate([rgb_image_array, bw_image_array], axis=1)
    
    ### create output array
    df_ = pd.read_csv("data_process/df_merged_tract_large.csv")
    df = df_.iloc[:size,]
    y_ = df[output_var].values 
    # cut y into categories for discrete variables
    if output_type == 'continuous':
        y = copy.deepcopy(y_)
    elif output_type == 'discrete':
        y = np.array(pd.qcut(y_, q = num_categories, labels=np.arange(num_categories))) 
    x = df[input_var]
    BE = df[BE_var]
            
    ### randomization
    shuffle_idx = np.arange(size)
    np.random.seed(0) # important: don't change the seed number, unless the seed number across scripts are all changed.
    np.random.shuffle(shuffle_idx)
    train_ratio = 0.8

    ###
    # y
    if output_type == 'discrete':
        y_train = y[shuffle_idx[:int(train_ratio*size)]].astype("int")
        y_test = y[shuffle_idx[int(train_ratio*size):]].astype("int")
    elif output_type == 'continuous':
        y_train = y[shuffle_idx[:int(train_ratio*size)]].astype("float32")
        y_test = y[shuffle_idx[int(train_ratio*size):]].astype("float32")
    # BE
    BE_train = BE.values[shuffle_idx[:int(train_ratio*size)]].astype("float32")
    BE_test = BE.values[shuffle_idx[int(train_ratio*size):]].astype("float32")        
    # image array
    x_train_images = image_array[shuffle_idx[:int(train_ratio*size)],].astype("float32")
    x_test_images = image_array[shuffle_idx[int(train_ratio*size):],].astype("float32")
    # NHTS
    x_train = x.values[shuffle_idx[:int(train_ratio*size)]].astype("float32")
    x_test = x.values[shuffle_idx[int(train_ratio*size):]].astype("float32")
    
    return y_train,y_test,BE_train,BE_test,x_train,x_test,x_train_images,x_test_images

# # test 
# image_type = 'bw'
# output_var = 'HHFAMINC_mean'
# output_type = 'continuous'
# input_var=['R_AGE_IMP_mean', 'HHSIZE_mean', 'HHFAMINC_mean', 'HBHTNRNT_mean', 'HBPPOPDN_mean', 'HBRESDN_mean', 
#            'R_SEX_IMP_2_mean', 'EDUC_2_mean', 'HH_RACE_2_mean', 'HOMEOWN_1_mean', 'HOMEOWN_2_mean',
#            'HBHUR_R_mean', 'HBHUR_S_mean', 'HBHUR_T_mean','HBHUR_U_mean']
# BE_var = ['density', 'diversity', 'design']
# num_categories = 1 # (1) certain category values can cause errors. (2) when output_type = 'continuous', this value needs to be 1.
# size = 10000 # size needs to be smaller than the max
# # 
# y_train,y_test,BE_train,BE_test,x_train,x_test,x_train_images,x_test_images = \
#     initialize_data(image_type, output_var, output_type, input_var, BE_var, num_categories, size)

# print(y_train.shape)
# print(y_test.shape)
# print(x_train_images.shape)
# print(x_test_images.shape)
# plt.figure()
# plt.boxplot(y_train)
# plt.figure()
# plt.boxplot(y_test)

In [4]:
def initialize_model(model_name, num_categories, input_channels = 3, use_pretrained=True, full_training=False):
    # initliaze the CNN model.
    # default input image size = 3*224*224, but inputs and output channels can be changed. 
    # num_categories: output channels. For continuous varialbes, use num_categories = 1.
    # return the model

    if model_name == 'resnet18':
        """ resnet 18"""
        model_ft = models.resnet18(pretrained=use_pretrained)
        # train only the last layer.
        for param in model_ft.parameters():
            param.requires_grad=full_training
        if input_channels != 3:
            # Edit the input channels.
            model_ft.conv1 = nn.Conv2d(input_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_categories) # if output_type == continuous, then num_categories = 1.

    elif model_name == 'alexnet':
        """ alexnet """
        model_ft = models.alexnet(pretrained=use_pretrained)
        for param in model_ft.parameters():
            param.requires_grad = full_training 
        if input_channels != 3:
            model_ft.features[0] = nn.Conv2d(input_channels, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))    
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_categories) # if output_type == continuous, then num_categories = 1.

    elif model_name == 'vgg':
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        for param in model_ft.parameters():
            param.requires_grad = full_training 
        if input_channels != 3:
            model_ft.features[0] = nn.Conv2d(input_channels, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_categories) # if output_type == continuous, then num_categories = 1.

    elif model_name == 'squeezenet':
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        for param in model_ft.parameters():
            param.requires_grad = full_training 
        if input_channels != 3:
            model_ft.features[0] = nn.Conv2d(input_channels, 96, kernel_size=(7, 7), stride=(2, 2))
        model_ft.classifier[1] = nn.Conv2d(512, num_categories, kernel_size=(1,1), stride=(1,1))

    elif model_name == 'densenet':
        model_ft = models.densenet121(pretrained=use_pretrained)
        for param in model_ft.parameters():
            param.requires_grad = full_training
        if input_channels != 3:
            model_ft.features[0] = nn.Conv2d(input_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_categories)

    elif model_name == 'wide_resnet':
        model_ft = models.wide_resnet50_2(pretrained=use_pretrained)
        for param in model_ft.parameters():
            param.requires_grad = full_training 
        if input_channels != 3:
            model_ft.conv1 = nn.Conv2d(input_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_categories)
        
    elif model_name == 'mnasnet':
        model_ft = models.mnasnet1_0(pretrained=use_pretrained)
        for param in model_ft.parameters():
            param.requires_grad = full_training
        if input_channels != 3:
            model_ft.layers[0] = nn.Conv2d(input_channels, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        num_ftrs = model_ft.classifier[1].in_features
        model_ft.classifier[1] = nn.Linear(num_ftrs, num_categories)
        
    return model_ft

# # test 1. initialize model for continuous var
# model_name = 'resnet18'
# num_categories = 1 
# input_channels = 4
# use_pretrained = True
# full_training = True
# model = initialize_model(model_name, num_categories, input_channels, use_pretrained, full_training)
# model.to(device)

# # test 2. initialize model for discrete var
# model_name = 'resnet18'
# # num_categories = 1 
# input_channels = 4
# use_pretrained = True
# full_training = True
# model = initialize_model(model_name, num_categories, input_channels, use_pretrained, full_training)
# model.to(device)

# # test 3. initialize model for continuous var
# model_name = 'bottleneck_resnet18'
# num_categories = 1 
# input_channels = 4
# use_pretrained = True
# full_training = True
# model = initialize_model(model_name, num_categories, input_channels, use_pretrained, full_training)
# model.to(device)


In [5]:
class bottleneck_resnet18(nn.Module):
    # This model does NOT work yet. It seems that the fc layer or the upsampling do not work...
    # Goal: create a resnet architecture with bottleneck in the middle that reduces information into several nodes.
    def __init__(self, num_categories, num_bottleneck, input_channels = 3, use_pretrained=True, full_training=False):
        super(bottleneck_resnet18, self).__init__()
        ref = models.resnet18(pretrained=use_pretrained)
        self.sequence1 = nn.Sequential(ref.conv1, ref.bn1, ref.relu, ref.maxpool, ref.layer1,
                                       ref.layer2)
        ### condense 
        if num_bottleneck == 1:
            self.condense = nn.AvgPool3d((128,28,28))
        elif num_bottleneck == 2:
            self.condense = nn.AvgPool3d((128,28,14))
        elif num_bottleneck == 3:
            self.condense = nn.AvgPool3d((128,28,9))

        ### upsampling
        self.upsample = nn.Sequential(nn.Conv2d(num_bottleneck, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False),
                                      nn.Upsample((28, 28)))
        self.sequence2 = nn.Sequential(ref.layer3, ref.layer4, ref.avgpool)
        self.fc = ref.fc
        
        ### edit parameters
        for param in self.parameters():
            param.requires_grad=full_training
        if input_channels != 3:
            self.sequence1[0]=nn.Conv2d(input_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        num_ftrs=self.fc.in_features
        self.fc=nn.Linear(num_ftrs, num_categories)
        
    def forward(self, x):
        x=self.sequence1(x)
        x=self.condense(x)
        x=self.upsample(x)
        x=self.sequence2(x)
        x=x.squeeze() # sw: this line is important, but I don't understand why resnet18 does not need it...
        out=self.fc(x)
        return out

In [6]:
def return_bottleneck_renset(model,device,x_train_images,x_test_images,y_train,y_test):
    # This function does not work yet.
    # Goal: return the several nodes' values from the bottleneck resnet architecture.
    from sklearn.preprocessing import MinMaxScaler

    bottleneck_train_list = []
    def hook_train(module,inputs,outputs):
        bottleneck_train_list.append(outputs)
        
    bottleneck_test_list = []
    def hook_test(module,inputs,outputs):
        bottleneck_test_list.append(outputs)

    x_train_images_norm = x_train_images/255
    x_test_images_norm = x_test_images/255

    x_train_torch = torch.from_numpy(x_train_images_norm)
    x_test_torch = torch.from_numpy(x_test_images_norm)
    y_train_torch = torch.from_numpy(y_train)
    y_test_torch = torch.from_numpy(y_test)

    # create data loader: train and test. 
    train_ds = TensorDataset(x_train_torch, y_train_torch)
    batch_size = 50
    train_dl_no_shuffle = DataLoader(train_ds, batch_size, shuffle = False) # important: NO SHUFFLE.

    test_ds = TensorDataset(x_test_torch, y_test_torch)
    batch_size = 50
    test_dl_no_shuffle = DataLoader(test_ds, batch_size, shuffle = False) # important: NO SHUFFLE.

    for param in model.parameters():
        param.requires_grad=False # save space
    
    for inputs, labels in train_dl_no_shuffle:
        # to device
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        bottleneck_train_list = model.layer3[1].conv1.register_forward_hook(hook_train)

    for inputs, labels in test_dl_no_shuffle:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        bottleneck_test_list = model.layer3[1].conv1.register_forward_hook(hook_test)

    return bottleneck_train_list,bottleneck_test_list


In [7]:
def train_discrete_model(model, train_dl, test_dl, criterion, optimizer, device, n_epoch = 25):
    # Train a model with discrete outputs.
    # Outputs: model; training and testing accuracy/log-loss.
    # But so far this function is not used because of bad performance on discrete outputs.
    log_loss_train_list=[]
    log_loss_test_list=[]
    accuracy_train_list=[]
    accuracy_test_list=[]

    # automatic model searching.
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    # iterate
    for epoch in range(n_epoch):
    
        running_log_loss_train = 0.0
        running_log_loss_test = 0.0
        correct_train = 0
        total_train = 0
        correct_test = 0
        total_test = 0

        # training    
        for inputs, labels in train_dl:
            # to device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward + backward
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            # evaluate prediction
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

            # evaluate log loss
            running_log_loss_train += loss.item()

            # optimize
            with torch.no_grad():
                optimizer.step()
                optimizer.zero_grad()

        # testing
        for inputs, labels in test_dl:
            # to device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward + backward
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # evaluate log loss
            running_log_loss_test += loss.item()

            # evaluate prediction
            _, predicted = torch.max(outputs.data, 1)
            total_test += labels.size(0)
            correct_test += (predicted == labels).sum().item()
        
        # print
        print("Epoch {}: Training Loss {}; Testing Loss {}".format(epoch, running_log_loss_train, running_log_loss_test))
        print("Epoch {}: Training Accuracy {}; Testing Accuracy {}".format(epoch, correct_train/total_train, correct_test/total_test))

        # append loss here.
        log_loss_train_list.append(running_log_loss_train)
        log_loss_test_list.append(running_log_loss_test)
        accuracy_train_list.append(correct_train/total_train)
        accuracy_test_list.append(correct_test/total_test)
        
        if correct_test/total_test > best_acc:
            best_acc = correct_test/total_test
            best_model_wts = copy.deepcopy(model.state_dict())

    # load the best model weights
    model.load_state_dict(best_model_wts)
    return model, log_loss_train_list, log_loss_test_list, accuracy_train_list, accuracy_test_list


In [8]:
def train_continuous_model(model, train_dl, test_dl, criterion, optimizer, device, total_mse_train, total_mse_test, n_epoch = 25):
    # This function trains the model with continous outputs.
    # outputs: model, and R2 and MSE for training and testing
    mse_train_list = []
    mse_test_list = []
    r_square_train_list = []
    r_square_test_list = []

    # automatic model searching.
    best_model_wts = copy.deepcopy(model.state_dict())
    best_r_square = 0.0
    
    for epoch in range(n_epoch): 
        running_mse_train = 0.0
        running_mse_test = 0.0
        
        # training
        for inputs, labels in train_dl:
            # to device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward + backward
            outputs = model(inputs)
            # sw: be careful about the dimension matching at this point...
            loss = criterion(outputs.view(-1), labels) # this .view(-1) seems specific to continuous variables
            loss.backward()

            # performance
            running_mse_train += loss.item()*batch_size

            # optimize
            with torch.no_grad():
                optimizer.step()
                optimizer.zero_grad()

        # testing
        for inputs, labels in test_dl:
            # to device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # forward + backward        
            outputs = model(inputs)
            loss = criterion(outputs.view(-1), labels) # this .view(-1) is specific to continuous variables
            running_mse_test += loss.item()*batch_size # this *batch_size is specific to continuous variables.

        # R square for a batch
        running_r_square_train = 1-running_mse_train/total_mse_train.item()
        running_r_square_test = 1-running_mse_test/total_mse_test.item()
        
        print("Epoch {}: Training MSE {}; Testing MSE {}".format(epoch, running_mse_train, running_mse_test))
        print("Epoch {}: Training R2 {}; Testing R2 {}".format(epoch, running_r_square_train, running_r_square_test))

        mse_train_list.append(running_mse_train)
        mse_test_list.append(running_mse_test)
        r_square_train_list.append(running_r_square_train)
        r_square_test_list.append(running_r_square_test)

        # check overfitting for early stopping. This is designed in an ad-hoc way.
        if epoch > 5 and running_r_square_test < 0.0:
            break # break the for loop.
        
        # store the best performance.
        if running_r_square_test > best_r_square:
            best_r_square = running_r_square_test
            best_model_wts = copy.deepcopy(model.state_dict())

    # load the weights of the best model
    model.load_state_dict(best_model_wts)
    return model, mse_train_list, mse_test_list, r_square_train_list, r_square_test_list


## Train resnet18 for continous outputs.

In [65]:
# set up.
output_list = ['HHVEHCNT_mean_norm', 'HHVEHCNT_P_CAP_mean_norm', 'TRPTRANS_1_mean_norm', 'TRPTRANS_2_mean_norm', 'TRPTRANS_3_mean_norm']
input_var=['R_AGE_IMP_mean', 'HHSIZE_mean', 'HHFAMINC_mean', 'HBHTNRNT_mean', 'HBPPOPDN_mean', 'HBRESDN_mean', 
           'R_SEX_IMP_2_mean', 'EDUC_2_mean', 'HH_RACE_2_mean', 'HOMEOWN_1_mean', 'HOMEOWN_2_mean',
           'HBHUR_R_mean', 'HBHUR_S_mean', 'HBHUR_T_mean','HBHUR_U_mean']
BE_var = ['density', 'diversity', 'design']
image_type = 'bw' # It can be 'rgb', 'bw', 'merge'
output_type = 'continuous' 
num_categories = 1 # Certain category values can cause errors. When output_type = 'continuous', this value needs to be 1.
size = 12000 # size needs to be smaller than the max (18491).

model_name = 'resnet18' 

performance_continuous = {}
model_dic = {}

In [66]:
for output_var in output_list:
    print(output_var)

    # data set up
    y_train,y_test,BE_train,BE_test,x_train,x_test,x_train_images,x_test_images = \
        initialize_data(image_type, output_var, output_type, input_var, BE_var, num_categories, size)

    # process data
    x_train_images_norm = x_train_images/255 # very crude processing. It is improvable.
    x_test_images_norm = x_test_images/255

    x_train_torch = torch.from_numpy(x_train_images_norm)
    x_test_torch = torch.from_numpy(x_test_images_norm)
    y_train_torch = torch.from_numpy(y_train)
    y_test_torch = torch.from_numpy(y_test)

    # create data loader: train and test. 
    train_ds = TensorDataset(x_train_torch, y_train_torch)
    batch_size = 100
    train_dl = DataLoader(train_ds, batch_size, shuffle = True)

    test_ds = TensorDataset(x_test_torch, y_test_torch)
    batch_size = 100
    test_dl = DataLoader(test_ds, batch_size, shuffle = True)

    # model set up
    input_channels = 4 # 4 for BW images; 3 for RGB images; 7 for merged images.
    use_pretrained = True # unclear whether True or False is better.
    full_training = True # Fully retraining the network seems to work better.
#     num_bottleneck = 3 # Used for the bottleneck model
    
    if model_name == 'bottleneck_resnet18': # It does not work.
        model = bottleneck_resnet18(num_categories, num_bottleneck, input_channels, use_pretrained, full_training)
        model.to(device)
    else: 
        # 'resnet18' and others works
        model = initialize_model(model_name, num_categories, input_channels, use_pretrained, full_training)
        model.to(device)
    
    # training set up
    criterion = nn.MSELoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    n_epoch = 25

    # create baseline mse
    total_mse_train = criterion(y_train_torch.mean().repeat(y_train_torch.size()), y_train_torch)*y_train_torch.size()[0]
    total_mse_test = criterion(y_test_torch.mean().repeat(y_test_torch.size()), y_test_torch)*y_test_torch.size()[0]
    print(total_mse_train)
    print(total_mse_test)
    
    # training here.
    model, mse_train_list, mse_test_list, r_square_train_list, r_square_test_list = \
        train_continuous_model(model, train_dl, test_dl, criterion, optimizer, device, total_mse_train, total_mse_test, n_epoch)

    # save models.
    PATH = './models/'+model_name+'_'+output_var+'_'+image_type+'.pth'
    torch.save(model.state_dict(), PATH)
    model_dic[output_var]=model.state_dict()
    
    # save performance
    performance_continuous[output_var] = {}
    performance_continuous[output_var]['mse_train_list']=mse_train_list
    performance_continuous[output_var]['mse_test_list']=mse_test_list
    performance_continuous[output_var]['r_square_train_list']=r_square_train_list
    performance_continuous[output_var]['r_square_test_list']=r_square_test_list


HHVEHCNT_mean_norm
tensor(10700.4521)
tensor(2549.4304)
Epoch 0: Training MSE 12604.125246405602; Testing MSE 2297.4649310112
Epoch 0: Training R2 -0.17790585589844254; Testing R2 0.09883207125079974
Epoch 1: Training MSE 9714.341014623642; Testing MSE 2388.0933046340942
Epoch 1: Training R2 0.09215602482347929; Testing R2 0.0632835922985201
Epoch 2: Training MSE 9775.559794902802; Testing MSE 2275.9082436561584
Epoch 2: Training R2 0.08643488524639154; Testing R2 0.10728756279377039
Epoch 3: Training MSE 9565.040963888168; Testing MSE 2266.590929031372
Epoch 3: Training R2 0.10610871099639718; Testing R2 0.1109422279895641
Epoch 4: Training MSE 9386.626183986664; Testing MSE 2269.328820705414
Epoch 4: Training R2 0.12278228491893062; Testing R2 0.10986830510363355
Epoch 5: Training MSE 9397.053003311157; Testing MSE 2431.677985191345
Epoch 5: Training R2 0.12180785699945096; Testing R2 0.046187742097365536


KeyboardInterrupt: 

In [43]:
# save performance
import pickle
with open('outputs/performance_continuous_'+image_type+'_'+model_name+'.pickle', 'wb') as h:
    pickle.dump(performance_continuous, h, protocol=pickle.HIGHEST_PROTOCOL)

# import pickle
# with open('outputs/performance_continuous.pickle', 'rb') as h:
#     performance_continuous = pickle.load(h)    

## Store and save resnet's last layer

In [9]:
### 
def return_last_layer_resnet(model,device,x_train_images,x_test_images,y_train,y_test):
    ###  
    # 
    from sklearn.preprocessing import MinMaxScaler

    image_train_hidden_list = []
    image_test_hidden_list = []

    # return values in the last layer.
    model_no_last_layer = nn.Sequential(*list(model.children())[:-1]).to(device)

    # process data
    x_train_images_norm = x_train_images/255
    x_test_images_norm = x_test_images/255

    x_train_torch = torch.from_numpy(x_train_images_norm)
    x_test_torch = torch.from_numpy(x_test_images_norm)
    y_train_torch = torch.from_numpy(y_train)
    y_test_torch = torch.from_numpy(y_test)

    # create data loader: train and test. 
    train_ds = TensorDataset(x_train_torch, y_train_torch)
    batch_size = 100
    train_dl_no_shuffle = DataLoader(train_ds, batch_size, shuffle = False) # important: NO SHUFFLE!!!

    test_ds = TensorDataset(x_test_torch, y_test_torch)
    batch_size = 100
    test_dl_no_shuffle = DataLoader(test_ds, batch_size, shuffle = False)

    for inputs, labels in train_dl_no_shuffle:
        # to device
        inputs = inputs.to(device)
        labels = labels.to(device)
        image_train_hidden_vector_batch = model_no_last_layer(inputs)
        image_train_hidden_list.append(image_train_hidden_vector_batch.squeeze().cpu().detach().numpy())

    for inputs, labels in test_dl_no_shuffle:
        inputs = inputs.to(device)
        labels = labels.to(device)
        # forward + backward
        image_test_hidden_vector_batch = model_no_last_layer(inputs)
        image_test_hidden_list.append(image_test_hidden_vector_batch.squeeze().cpu().detach().numpy())

    # vectorize
    image_train_hidden_vector=np.array(image_train_hidden_list).reshape(-1,512) # 512, resnet architecture   
    image_test_hidden_vector=np.array(image_test_hidden_list).reshape(-1,512) # 512, resnet architecture

    # scale
    scaler = MinMaxScaler()
    image_train_hidden_vector_norm = scaler.fit_transform(image_train_hidden_vector)
    image_test_hidden_vector_norm = scaler.fit_transform(image_test_hidden_vector)
    
    return image_train_hidden_vector_norm,image_test_hidden_vector_norm


In [11]:
# load model dictionary for all the output variables
model_dic = {}
model_name = 'resnet18'
input_channels = 4 # for BW images
use_pretrained = True # unclear True vs False is better
full_training = True
last_layer_dic_train = {}
last_layer_dic_test = {}

for output_var in output_list:
    print(output_var)
    # read models
    model = initialize_model(model_name, num_categories, input_channels, use_pretrained, full_training)
    PATH = './models/'+model_name+'_'+output_var+'_'+image_type+'.pth'
    model.load_state_dict(torch.load(PATH))
    model_dic[output_var]=model.state_dict()
    
    # initialize data. 
    y_train,y_test,BE_train,BE_test,x_train,x_test,x_train_images,x_test_images = \
        initialize_data(image_type, output_var, output_type, input_var, BE_var, num_categories, size)
    
    # obtain the last layer
    image_train_hidden_vector_norm,image_test_hidden_vector_norm = \
        return_last_layer_resnet(model,device,x_train_images,x_test_images,y_train,y_test)

    # 
    last_layer_dic_train[output_var]=image_train_hidden_vector_norm 
    last_layer_dic_test[output_var]=image_test_hidden_vector_norm


HHVEHCNT_mean_norm
HHVEHCNT_P_CAP_mean_norm
TRPTRANS_1_mean_norm
TRPTRANS_2_mean_norm
TRPTRANS_3_mean_norm


In [16]:
import pickle
with open('data_process/last_layer_dic_train.pickle', 'wb') as h:
    pickle.dump(last_layer_dic_train, h, protocol=pickle.HIGHEST_PROTOCOL)

with open('data_process/last_layer_dic_test.pickle', 'wb') as h:
    pickle.dump(last_layer_dic_test, h, protocol=pickle.HIGHEST_PROTOCOL)
    