In [1]:
from pathlib import Path
# From keras we "translate" the code to Pytorch
#from tensorflow.keras.layers import Dense, LeakyReLU
import torch
from torch import nn
from torch import Tensor
from torch import reshape as tshape
from torch import matmul as tmat
import numpy as np

In [2]:
torch.__version__

'1.8.1'

In [3]:
ROOT_PATH = './data'

In [5]:
print("Current device: ",torch.cuda.current_device())
print("Device count: ",torch.cuda.device_count())
print("Is the GPU available? ",torch.cuda.is_available())

# use gpu if available, else cpu
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', torch.cuda.get_device_properties(device))

Current device:  0
Device count:  1
Is the GPU available?  True
Using device: _CudaDeviceProperties(name='GeForce GTX 1080', major=6, minor=1, total_memory=8118MB, multi_processor_count=20)


# MUDI data

In [68]:
# packages related to data reading
import pandas as pd
import os
import h5py

# pytorch
from torch.utils.data import Dataset, DataLoader
import torchvision

In [69]:
class MRISelectorSubjDataset(Dataset):
    """MRI dataset to select features from."""
    
    # pytorch
    def __init__(self, root_dir, dataf, headerf, subj_list):
        """
        Args:
            root_dir (string): Directory with the .csv files
            data (string): Data .csv file
            header (string): Header .csv file
            subj_list (list): list of all the subjects to include
            
            batch_size & shuffle are defined with 'DataLoader' in pytorch 
        """
        
        self.root_dir = root_dir
        self.dataf = dataf
        self.headerf = headerf
        self.subj_list = subj_list
        
        # load the header
        subj = self.subj_list[0]
        self.header = pd.read_csv(os.path.join(self.root_dir,
                                             self.headerf), index_col=0).to_numpy()
        self.ind = self.header[np.isin(self.header[:,1],self.subj_list),0]
#         print(self.ind)
        
        self.indexes = np.arange(len(self.ind))
        
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.ind)
    
    def __getitem__(self, index):
        'Generates one sample of data'
        indexes = self.indexes[index]
        
        # Find list of IDs
        #list_IDs_temp = [self.ind[k] for k in indexes]
        list_IDs_temp = self.ind[indexes]
        
        h5f = h5py.File(os.path.join(self.root_dir, self.dataf), 'r')
        X = h5f.get('data1')
        X = X[list_IDs_temp,:]
        
        return X

In [107]:
import math
#import random
import pickle as pk
import torch.nn.functional as F
from pytorch_lightning.callbacks.base import Callback
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.autograd import Variable

class ConcreteSelect(nn.Module):
    
    def __init__(self, output_dim, input_shape, n_features = 500, start_temp = 10.0, min_temp = 0.1, alpha = 0.99999, **kwargs):
        super(ConcreteSelect, self).__init__(**kwargs)
        # encoder
        self.output_dim = output_dim
        self.input_shape = input_shape # the input layer has output (None,N_params_in). In this case, probably equal to input_dim
        self.start_temp = start_temp
        #self.min_temp = K.constant(min_temp)
        self.min_temp = nn.init.constant_(Tensor(np.zeros(1)),min_temp).to(device)
        #self.alpha = K.constant(alpha)
        self.alpha = nn.init.constant_(Tensor(np.zeros(1)),alpha).to(device)
        #self.name = name
              
        # equivalent to build in Keras
        self.temp = Variable(Tensor([self.start_temp]), requires_grad = False).to(device)
        tensor_logits = nn.init.xavier_normal_(torch.empty(self.output_dim,self.input_shape)).to(device)
        self.logits = nn.Parameter(tensor_logits, requires_grad = True).to(device)

        # for the decoder, we define three different Linear/dense layers and the activation function
        self.dense800 = nn.Linear(n_features,800)
        #self.dense800 = nn.Linear(500,800) # the example for the standard 500 features value
        self.dense1000 = nn.Linear(800,1000)
        self.dense1344 = nn.Linear(1000,1344)
        self.act = nn.LeakyReLU(0.2)
        
    # equivalent to call in Keras -> encoder, the concrete layer itself   
    def encoder(self, X, training = None):
        
        uniform = torch.rand(self.logits.size()).to(device)
        gumbel = -torch.log(-torch.log(uniform)).to(device)
        self.temp = torch.maximum(self.min_temp, self.temp * self.alpha).to(device)
        #print('temperature {}'.format(self.temp))
        #noisy_logits = (self.logits + gumbel.to(device)) / self.temp
        noisy_logits = ((self.logits + gumbel) / self.temp).to(device)
        samples = F.softmax(noisy_logits, dim = 1)
                
        #numClasses = self.logits.size()[1]
        dim_argmax = len(self.logits.size())-1
        discrete_logits = F.one_hot(torch.argmax(self.logits.to(device),dim_argmax),num_classes = self.logits.size()[1])
        
        # probably unnecessary
        if training is None:
            training = self.training
        
        if self.training:
            self.selections = samples
        else:
            self.selections = discrete_logits
        
        #Y = torch.dot(X,torch.transpose(self.selections, 0, 1)) 
        # dot is not exactly equal to a dot product, it could be a matrix product in keras 
        Y = torch.matmul(X,torch.transpose(self.selections.float(), 0, 1))
        return Y
    
    # decoder: we suppose the two-layers scheme. In keras this is defined outside
    def decoder(self,x):
        #x.to("cpu")
        x = self.act(self.dense800(x))
        x = self.act(self.dense1000(x))
        x = self.dense1344(x)
        
        return x
    
    def forward(self, X, training = None):
        y = self.encoder(X) # selected features
        x = self.decoder(y) # reconstructed signals

        return x, y
    
class StopperCallback(EarlyStopping):
    
    def __init__(self, mean_max_target = 0.998):#, writer=None):
        self.mean_max_target = mean_max_target
        #self.writer = writer
        #super(StopperCallback, self).__init__(monitor = '', patience = float('inf'), verbose = 1, mode = 'max')#, baseline = self.mean_max_target)
        super(StopperCallback, self).__init__(monitor = '', patience = float('inf'), verbose = True, mode = 'max')
    

class ConcreteAutoencoderFeatureSelector():
    
    #def __init__(self, K, output_function, num_epochs = 100, learning_rate = 0.001, start_temp = 10.0, min_temp = 0.1, tryout_limit = 5, input_dim = 1344, callback=None, writer=None): #batch_size = None, 
    def __init__(self, K, num_features = 500, num_epochs = 100, learning_rate = 0.001, start_temp = 10.0, min_temp = 0.1, tryout_limit = 5, input_dim = 1344, checkpt=True, callback=None, writer=None, path = ''):#, losstrain=None, lossval=None): #batch_size = None, 
        self.K = K # equivalent to output_dim
        # self.output_function = output_function # this function is now included in the ConcreteSelect class
        # but now we have to define the number of features to be extracted from the encoder
        self.num_features = num_features
        self.num_epochs = num_epochs
#         self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.start_temp = start_temp
        self.min_temp = min_temp
        self.tryout_limit = tryout_limit
        self.input_dim = input_dim
        self.checkpt = checkpt
        self.callback = callback
        self.writer = writer
        self.path = path #str(Path(ROOT_PATH, 'runs', 'models'))
        #self.losstrain = losstrain
        #self.lossval = lossval
        
    def fit(self, X, val_X=None):
#         if self.batch_size is None:
#             self.batch_size = max(len(X) // 256, 16)
        
        num_epochs = self.num_epochs
        steps_per_epoch = X.__len__()#(len(X) + self.batch_size - 1) // self.batch_size
        print("steps per epoch: ",steps_per_epoch)
        writer = self.writer
        #losses,losses_val=[],[]
        
        for i in range(self.tryout_limit):
            
            alpha = math.exp(math.log(self.min_temp / self.start_temp) / (num_epochs * steps_per_epoch))
            
            # we apply the model
            self.model = ConcreteSelect(self.K, self.input_dim, self.num_features, self.start_temp, self.min_temp, alpha).to(device)
            
            # we define the loss and the optimizer functions
            criterion = nn.MSELoss()
            optimizer = torch.optim.Adam(self.model.parameters(),lr=self.learning_rate)
            
            
            
            if self.checkpt==True:
                checkpoint = torch.load(self.path)
                self.model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
                epoch_check = checkpoint['epoch']
                loss = checkpoint['loss']
            self.model.train()
            
            stopper_callback = StopperCallback()#writer=self.writer)

            print(self.callback)
            
            for epoch in range(num_epochs):
                if self.checkpt==True:
                    if epoch < epoch_check:
                        continue
                
                value_stop = torch.mean(torch.max(F.softmax(self.model.logits, dim = 1),1).values)
                print('mean max of probabilities:', value_stop, '- temperature', self.model.temp)
                
                if value_stop >= stopper_callback.mean_max_target:
                    break
                
                self.model.train()
                for j, signals in enumerate(X):
                    signals = signals.to(device)
                    # just to check how it's going, the next two lines can be commented or removed
                    if(j%500 == 0):
                        print("iteration: ",j)
                        
                    # steps in pytorch:
                    # 1. Initialise gradients at the start of each batch
                    # 2. Run the forward and then the backwards pass
                    # 3. Compute the loss and update the weights
                    
                    # Initialise gradients
                    optimizer.zero_grad()

                    outputs, selected_features = self.model(signals)
                    loss = criterion(outputs, signals) # like criterion(yhat,target) -> the target in the autoencoder is the input
                    
                    writer.add_scalar(str(Path(ROOT_PATH, 'runs', 'scalars')), loss, epoch)
                    
                    #print('Epoch {}: Loss = {}'.format(epoch+1, loss.item())) # just to check how it's going
                    
                    # Backward pass
                    loss.backward()

                    # Compute the loss and update the weights
                    optimizer.step()
                
                if val_X is not None:
                    # Evaluate the model
                    self.model.eval()
                    
                    #steps_per_epoch_val = val_X.__len__()
                    for j, signals in enumerate(val_X):
                        signals = signals.to(device)
                        outputs_pred, selected_features_pred = self.model(signals)

                        loss = criterion(outputs_pred,signals)
                
                # save for checkpoint
                torch.save({'epoch': epoch,
                        'model_state_dict': self.model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': loss.item(),
                        }, self.path)
                
            num_epochs *= 2
        
        self.probabilities = F.softmax(self.model.logits, dim = 1)
        self.indices = torch.argmax(self.model.logits, 1)
        
        return self
    
    def get_indices(self):
        val = torch.argmax(self.model.logits, 1)
        return val
        #return K.get_value(K.argmax(self.model.get_layer('concrete_select').logits))
    
    def get_mask(self):
        #nn.functional.one_hot(torch.argmax(self.logits),list(self.logits.size())[1], dim = )
        dim_argmax = len(self.model.logits.size())-1
        val = torch.sum(nn.functional.one_hot(torch.argmax(self.model.logits,dim_argmax),self.model.logits.size()[1]))
        return val
        #return K.get_value(K.sum(K.one_hot(K.argmax(self.model.get_layer('concrete_select').logits), self.model.get_layer('concrete_select').logits.shape[1]), axis = 0))
    

    def get_support(self, indices = False):
        return self.get_indices() if indices else self.get_mask()
    
    def get_params(self):
        return self.model
        #return self.output_function(self.concrete_select)

In [85]:
# import modules to build RunBuilder and RunManager helper classes
from collections  import OrderedDict
from collections import namedtuple
from itertools import product

# Read in the hyper-parameters and return a Run namedtuple containing all the 
# combinations of hyper-parameters
class RunBuilder():
  @staticmethod
  def get_runs(params):

    Run = namedtuple('Run', params.keys())

    runs = []
    for v in product(*params.values()):
      runs.append(Run(*v))
    
    return runs

In [120]:
# put all hyper params into a OrderedDict, easily expandable
params = OrderedDict(
    lr = [.001],
    batch_size = [256]
#     batch_size = [64]
)

In [73]:
from datetime import datetime

## Experiment 3: 2 layers

In [74]:
n_means = 500
num_epochs = 2000
#dec = decoder_2l
#dec = mudi_net(n_meas)
decstr = 'l2'

In [75]:
testsubj = 15
testsubjstr = '15'

In [None]:
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
writer = SummaryWriter()

torch.manual_seed(14)

for run in RunBuilder.get_runs(params):
    model_info_template_str = f'{run}_K={n_means}_epoch={num_epochs}_test={testsubjstr}_dec={decstr}'

    checkpoint_path = str(Path(ROOT_PATH, 'runs', 'models', f'{model_info_template_str}_runtime.h5'))
    monitor_callback = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=True)
    
    root_dir = './data' # change depending on the directory where data are located
    dataf = 'data_.hdf5'
    headerf = 'header_.csv'
    subj_list_train = np.array([11, 12, 13, 14])
    subj_list_valid = np.array([15])
    
    train_set = MRISelectorSubjDataset(ROOT_PATH,dataf,headerf,subj_list_train)
    train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 15, pin_memory=False, drop_last=True)
    
    # for the validation dataset
    valid_set = MRISelectorSubjDataset(ROOT_PATH,dataf,headerf,subj_list_valid)
    valid_gen = DataLoader(valid_set, batch_size = run.batch_size, shuffle = False, num_workers = 15, pin_memory=False, drop_last=True)

    path = str(Path(ROOT_PATH, 'runs', 'models', 'check15', 'model.pt'))
    # 1st time
    checkpt = False
    # Continue training
    # checkpt = True
    # temp = Tensor([10]) # check last value if necessary
    
    selector = ConcreteAutoencoderFeatureSelector(K=n_means, num_features=n_means, num_epochs=num_epochs, learning_rate=run.lr, start_temp=10, min_temp=0.1, 
                                                  tryout_limit=5, input_dim=1344, checkpt = checkpt, callback=monitor_callback, writer=writer, path = path)#,losstrain=losstrain,lossval=lossval)    
    
    selector.fit(X=train_gen, val_X=valid_gen)
    
    model = selector.get_params()
    
    #model.save('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')
    torch.save(model, Path(ROOT_PATH, 'runs', 'models', f'{model_info_template_str}.pt'))
    # save only parameters
    torch.save(model.state_dict(), Path(ROOT_PATH, 'runs', 'models', f'{model_info_template_str}_params.pt'))
    
    print(np.sort(selector.get_indices()))
    np.savetxt(Path(ROOT_PATH, 'runs', 'models', f'{model_info_template_str}.txt'), np.array(selector.get_indices(), dtype=int), fmt='%d')
    
    #model.save('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')
    #torch.save(model, './runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')

torch.save(model.state_dict(), Path(ROOT_PATH, 'runs', 'models', f'epoch={num_epochs}_net.pth'))
model_file = open(Path(ROOT_PATH, 'runs', 'models', f'epoch={num_epochs}_net.bin'),'wb')
pk.dump(model,model_file,pk.HIGHEST_PROTOCOL)      
model_file.close() 

steps per epoch:  1830
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f00e80d5e50>
mean max of probabilities: tensor(0.0008, device='cuda:0', grad_fn=<MeanBackward0>) - temperature tensor([10.], device='cuda:0')
iteration:  0
iteration:  500
iteration:  1000
iteration:  1500
mean max of probabilities: tensor(0.0026, device='cuda:0', grad_fn=<MeanBackward0>) - temperature tensor([9.9773], device='cuda:0')
iteration:  0
iteration:  500
iteration:  1000
iteration:  1500
mean max of probabilities: tensor(0.0035, device='cuda:0', grad_fn=<MeanBackward0>) - temperature tensor([9.9546], device='cuda:0')
iteration:  0
iteration:  500
iteration:  1000
iteration:  1500
mean max of probabilities: tensor(0.0045, device='cuda:0', grad_fn=<MeanBackward0>) - temperature tensor([9.9319], device='cuda:0')
iteration:  0
iteration:  500
iteration:  1000
iteration:  1500
mean max of probabilities: tensor(0.0059, device='cuda:0', grad_fn=<MeanBackward0>) - temperature tensor([9.

In [None]:
print(np.sort(selector.get_indices()))
np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_meas) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d')

In [None]:
a = np.loadtxt('./runs/textfiles/Run(lr=0.001, batch_size=256)K=500_epoch=2000_test15_decl2.txt')
a = np.sort(a.astype(int))
print(a)

In [10]:
testsubj = 14
testsubjstr = '14'

In [110]:
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
writer = SummaryWriter()

"""def pad_collate(batch):
    xx = list(zip(*batch))
    xx_pad = pad_sequence(torch.as_tensor(xx), batch_first=True, padding_value=0)
    return xx_pad #, xlens"""

#torch.manual_seed(14)

for run in RunBuilder.get_runs(params):
    monitor_callback = ModelCheckpoint('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '_runtime.h5', monitor='val_loss', verbose=True)
    
    root_dir = './MUDI/data'
    dataf = 'data_.hdf5'
    headerf = 'header_.csv'
    subj_list_train = np.array([11, 12, 13, 15])
    subj_list_valid = np.array([14])
    
    train_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_train)
    train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, pin_memory=False, drop_last=True)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 4, pin_memory=False, collate_fn = pad_collate)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 0, pin_memory=False)#, collate_fn = pad_collate)
    # for the validation dataset
    valid_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_valid)
    valid_gen = DataLoader(valid_set, batch_size = run.batch_size, shuffle = False, pin_memory=False, drop_last=True)
    
    """### Allocate memory for losses
    n_batch=0   # Count how many mini-batches of size mbatch we created
    for j,signals in enumerate(train_gen):
        n_batch = n_batch+1
        signals = signals[:,:,ind_MUDI]
        print(signals.size())
    losstrain = np.zeros((num_epochs,n_batch)) + np.nan
    
    n_batch=0   # Count how many mini-batches of size mbatch we created
    for j,signals in enumerate(valid_gen):
        n_batch = n_batch+1
    lossval = np.zeros((num_epochs,n_batch)) + np.nan"""
    
    path = './runs/models/check14/model.pt'
    # 1st time
    checkpt = False
    # Continue training
    checkpt = False
    
    selector = ConcreteAutoencoderFeatureSelector(K=n_means, num_features=n_means, num_epochs=num_epochs, learning_rate=run.lr, start_temp=10.0, min_temp=0.1, 
                                                  tryout_limit=5, input_dim=1344, checkpt = checkpt, callback=monitor_callback, writer=writer, path = path)#,losstrain=losstrain,lossval=lossval)    

    #selector.fit(X=train_gen, val_X=valid_gen)
    selector.fit(X=train_gen, val_X=valid_gen)
    
    model = selector.get_params()
    
    print(np.sort(selector.get_indices()))
    np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d')
    
    #model.save('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')
    torch.save(model, './runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    # save only parameters
    torch.save(model.state_dict(),'./runs/models/params_' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    
    torch.save(model.state_dict(), os.path.join('./runs/models/','epoch{}_net.pth'.format(num_epochs)) )
    model_file = open(os.path.join('./runs/models/','epoch{}_net.bin'.format(num_epochs)),'wb')
    pk.dump(model,model_file,pk.HIGHEST_PROTOCOL)      
    model_file.close()

FileNotFoundError: [Errno 2] No such file or directory: '/home/sapap9/PythonCode/MUDI/data/header_.csv'

In [19]:
model.save_weights('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')

In [14]:
print(np.sort(selector.get_indices()))
np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d') 

[   2    6   12   19   23   27   28   31   31   32   33   34   41   44
   45   49   51   54   54   54   57   63   69   70   71   72   73   77
   77   80   87   88   94   96   98  100  100  107  108  108  111  114
  116  118  125  136  137  140  143  146  147  148  153  154  155  158
  159  161  169  169  173  175  176  180  180  183  187  191  192  194
  196  196  198  201  202  205  207  214  218  219  226  226  227  231
  236  238  241  243  245  245  248  249  254  255  255  256  269  271
  271  271  272  272  273  275  279  289  291  291  292  301  302  306
  310  310  314  318  319  324  329  330  335  335  337  350  352  353
  358  360  365  372  374  375  382  382  387  390  393  395  398  400
  412  414  415  416  416  418  419  425  429  429  430  431  433  433
  444  450  454  456  459  461  462  463  464  468  469  471  472  478
  481  482  485  486  487  489  490  494  498  503  509  513  514  521
  521  524  538  538  539  540  540  546  552  554  558  558  560  563
  568 

In [11]:
testsubj = 13
testsubjstr = '13'

In [12]:
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
writer = SummaryWriter()

for run in RunBuilder.get_runs(params):
    monitor_callback = ModelCheckpoint('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '_runtime.h5', monitor='val_loss', verbose=True)

    root_dir = './data'
    dataf = 'data_.hdf5'
    headerf = 'header_.csv'
    subj_list_train = np.array([11, 12, 14, 15])
    subj_list_valid = np.array([13])
    
    train_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_train)
    train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, pin_memory=False, drop_last=True)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 4, pin_memory=False, collate_fn = pad_collate)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 0, pin_memory=False)#, collate_fn = pad_collate)
    # for the validation dataset
    valid_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_valid)
    valid_gen = DataLoader(valid_set, batch_size = run.batch_size, shuffle = False, pin_memory=False, drop_last=True)
    
    path = './runs/models/check13/model.pt'
    # 1st time
    checkpt = False
    # Continue training
    checkpt = True
    
    selector = ConcreteAutoencoderFeatureSelector(K=n_means, num_features=n_means, num_epochs=num_epochs, learning_rate=run.lr, start_temp=10.0, min_temp=0.1, 
                                                  tryout_limit=5, input_dim=1344, checkpt = checkpt, callback=monitor_callback, writer=writer, path = path)#,losstrain=losstrain,lossval=lossval)    
    
    selector.fit(X=train_gen, val_X=valid_gen)
    
    model = selector.get_params()
    
    print(np.sort(selector.get_indices()))
    np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d')
    
    #model.save_weights('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')
    torch.save(model, './runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    # save only parameters
    #torch.save(model.state_dict(),'./runs/models/params_' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    
torch.save(model.state_dict(), os.path.join('./runs/models/','epoch{}_net.pth'.format(num_epochs)) )
model_file = open(os.path.join('./runs/models/','epoch{}_net.bin'.format(num_epochs)),'wb')
pk.dump(model,model_file,pk.HIGHEST_PROTOCOL)      
model_file.close()

Epoch 1: Loss = 0.24961930513381958
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 0
Epoch 1: Loss = 0.1942870020866394
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 1
Epoch 1: Loss = 0.1328582912683487
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 2
Epoch 1: Loss = 0.13640634715557098
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 3
Epoch 1: Loss = 0.05716152489185333
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 4
Epoch 1: Loss = 0.05666468292474747
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 5
Epoch 1: Loss = 0.06451275199651718
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f4240daf880>
j train = 6
Epoch 1: Loss = 0.05805479362607002
<pytorch_light

In [13]:
torch.save(model.state_dict(),'./runs/models/params_' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    
torch.save(model.state_dict(), os.path.join('./runs/models/','epoch{}_net.pth'.format(num_epochs)) )
model_file = open(os.path.join('./runs/models/','epoch{}_net.bin'.format(num_epochs)),'wb')
pk.dump(model,model_file,pk.HIGHEST_PROTOCOL)      
model_file.close()

In [29]:
print(np.sort(selector.get_indices()))
np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d') 

[   0    0    0    0    1    3    4   10   13   15   16   16   19   20
   25   29   32   35   36   37   40   40   43   44   46   51   52   54
   56   59   60   64   68   75   76   79   81   84   88   91   92   92
   94   99  100  103  104  107  108  110  112  114  116  117  120  123
  128  131  136  138  140  147  148  149  156  156  163  164  169  173
  177  179  180  180  181  183  184  184  188  196  197  198  199  200
  204  210  212  212  215  216  222  224  224  225  229  232  232  233
  236  243  248  252  256  256  260  264  265  267  268  272  278  282
  285  287  288  290  292  293  295  296  299  300  307  308  309  311
  312  313  319  320  320  323  328  331  333  338  343  344  344  345
  349  350  356  356  363  365  365  367  368  369  372  375  376  381
  384  385  387  389  392  393  395  396  397  404  404  405  407  408
  408  408  408  412  418  419  425  428  428  430  432  435  436  437
  443  446  452  459  464  475  476  484  487  488  495  496  497  500
  502 

In [14]:
testsubj = 12
testsubjstr = '12'

In [15]:
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
writer = SummaryWriter()

for run in RunBuilder.get_runs(params):
    monitor_callback = ModelCheckpoint('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '_runtime.h5', monitor='val_loss', verbose=True)
    
    root_dir = './data'
    dataf = 'data_.hdf5'
    headerf = 'header_.csv'
    subj_list_train = np.array([11, 13, 14, 15])
    subj_list_valid = np.array([12])
    
    train_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_train)
    train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, pin_memory=False, drop_last=True)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 4, pin_memory=False, collate_fn = pad_collate)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 0, pin_memory=False)#, collate_fn = pad_collate)
    # for the validation dataset
    valid_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_valid)
    valid_gen = DataLoader(valid_set, batch_size = run.batch_size, shuffle = False, pin_memory=False, drop_last=True)
    
    path = './runs/models/check12/model.pt'
    # 1st time
    checkpt = False
    # Continue training
    checkpt = True
    
    selector = ConcreteAutoencoderFeatureSelector(K=n_means, num_features=n_means, num_epochs=num_epochs, learning_rate=run.lr, start_temp=10.0, min_temp=0.1, 
                                                  tryout_limit=5, input_dim=1344, checkpt = checkpt, callback=monitor_callback, writer=writer, path = path)#,losstrain=losstrain,lossval=lossval)    
    
    selector.fit(X=train_gen, val_X=valid_gen)
    
    model = selector.get_params()
    
    print(np.sort(selector.get_indices()))
    np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d')
    
    #model.save('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')
    torch.save(model, './runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    # save only parameters
    torch.save(model.state_dict(),'./runs/models/params_' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    
    torch.save(model.state_dict(), os.path.join('./runs/models/','epoch{}_net.pth'.format(num_epochs)) )
    model_file = open(os.path.join('./runs/models/','epoch{}_net.bin'.format(num_epochs)),'wb')
    pk.dump(model,model_file,pk.HIGHEST_PROTOCOL)      
    model_file.close()

Epoch 1: Loss = 0.24041230976581573
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 0
Epoch 1: Loss = 0.19781215488910675
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 1
Epoch 1: Loss = 0.13479676842689514
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 2
Epoch 1: Loss = 0.12345556914806366
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 3
Epoch 1: Loss = 0.059875719249248505
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 4
Epoch 1: Loss = 0.05669020488858223
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 5
Epoch 1: Loss = 0.06095398589968681
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4d2ea60>
j train = 6
Epoch 1: Loss = 0.05110770836472511
<pytorch_li

In [13]:
print(np.sort(selector.get_indices()))
np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d') 

[   0    6    6    6    7    9    9   13   13   15   18   20   20   22
   22   23   23   26   32   35   38   49   51   51   51   52   54   58
   60   65   68   72   72   73   80   90   91   91   95   98  100  101
  102  105  108  108  111  112  113  113  116  117  120  122  122  124
  128  131  138  138  152  154  157  160  161  166  166  167  171  173
  174  179  179  180  181  185  187  188  191  196  197  207  210  211
  213  215  218  220  221  225  229  229  231  231  232  232  236  238
  246  247  247  250  253  255  259  268  268  269  269  283  286  288
  292  294  297  298  299  311  313  314  315  315  317  319  319  319
  323  324  328  335  340  344  344  345  346  356  364  364  364  365
  370  372  373  374  379  381  388  389  389  396  397  397  398  398
  400  402  404  405  406  407  407  409  409  415  417  419  422  424
  431  434  436  448  461  462  470  476  490  496  496  504  505  505
  505  506  509  509  509  510  511  516  519  521  524  527  529  530
  533 

In [16]:
testsubj = 11
testsubjstr = '11'

In [17]:
from torch.utils.tensorboard import SummaryWriter
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn.utils.rnn import pad_sequence
writer = SummaryWriter()

for run in RunBuilder.get_runs(params):
    monitor_callback = ModelCheckpoint('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '_runtime.h5', monitor='val_loss', verbose=True)
    
    root_dir = './data'
    dataf = 'data_.hdf5'
    headerf = 'header_.csv'
    subj_list_train = np.array([12, 13, 14, 15])
    subj_list_valid = np.array([11])
    
    train_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_train)
    train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, pin_memory=False, drop_last=True)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 4, pin_memory=False, collate_fn = pad_collate)
    #train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True, num_workers = 0, pin_memory=False)#, collate_fn = pad_collate)
    # for the validation dataset
    valid_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list_valid)
    valid_gen = DataLoader(valid_set, batch_size = run.batch_size, shuffle = False, pin_memory=False, drop_last=True)  
    
    path = './runs/models/check11/model.pt'
    # 1st time
    checkpt = False
    # Continue training
    checkpt = True
    
    selector = ConcreteAutoencoderFeatureSelector(K=n_means, num_features=n_means, num_epochs=num_epochs, learning_rate=run.lr, start_temp=10.0, min_temp=0.1, 
                                                  tryout_limit=5, input_dim=1344, checkpt = checkpt, callback=monitor_callback, writer=writer, path = path)#,losstrain=losstrain,lossval=lossval)    
    
    selector.fit(X=train_gen, val_X=valid_gen)
    
    model = selector.get_params()
    
    print(np.sort(selector.get_indices()))
    np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d')
    
    #model.save('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.h5')
    torch.save(model, './runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    # save only parameters
    torch.save(model.state_dict(),'./runs/models/params_' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.pt')
    
    torch.save(model.state_dict(), os.path.join('./runs/models/','epoch{}_net.pth'.format(num_epochs)) )
    model_file = open(os.path.join('./runs/models/','epoch{}_net.bin'.format(num_epochs)),'wb')
    pk.dump(model,model_file,pk.HIGHEST_PROTOCOL)      
    model_file.close()

Epoch 1: Loss = 0.25644588470458984
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 0
Epoch 1: Loss = 0.2039867341518402
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 1
Epoch 1: Loss = 0.13457338511943817
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 2
Epoch 1: Loss = 0.12008857727050781
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 3
Epoch 1: Loss = 0.05704236030578613
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 4
Epoch 1: Loss = 0.05602189525961876
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 5
Epoch 1: Loss = 0.06095528602600098
<pytorch_lightning.callbacks.model_checkpoint.ModelCheckpoint object at 0x7f41f4ded7f0>
j train = 6
Epoch 1: Loss = 0.06257791817188263
<pytorch_ligh

In [13]:
print(np.sort(selector.get_indices()))
np.savetxt('./runs/textfiles/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '.txt', np.array(selector.get_indices(), dtype=int), fmt='%d') 

[   2    4    4    7    7    8    8   10   10   11   13   16   16   17
   19   21   24   32   47   52   53   55   56   58   58   58   63   63
   73   74   84   85   89   91   92  100  102  105  105  106  106  117
  117  119  121  122  122  123  124  126  128  130  132  132  142  146
  151  153  155  174  174  175  177  180  185  187  189  195  203  203
  205  206  207  208  212  216  222  222  223  223  224  225  225  227
  231  235  237  240  241  246  249  249  251  253  253  255  258  260
  260  261  261  262  262  266  267  270  271  271  272  275  289  291
  291  291  294  298  300  305  308  308  312  312  313  320  324  330
  331  331  335  337  338  341  342  342  348  352  353  353  357  360
  362  364  368  371  378  378  392  394  396  397  400  401  404  406
  406  407  411  413  414  416  419  419  421  425  428  431  445  447
  450  453  455  458  460  461  463  466  467  469  471  471  472  472
  476  476  478  484  486  487  487  490  493  495  503  508  508  509
  519 

In [37]:
for run in RunBuilder.get_runs(params):
    for trial in range(3):
        logdir = "./runs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_testnone_dec' + decstr + '_trial' + str(trial)

        """tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
        monitor_callback = keras.callbacks.ModelCheckpoint('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_testnone_dec' + decstr + '_runtime'  + '_trial' + str(trial) + '.h5', monitor='val_loss', verbose=0, save_weights_only=True)

        trainset = MRISelectorSubjDataset(root_dir='./data', dataf='data_.hdf5', headerf ='header_.csv',
                                      subj_list=np.array([11, 12, 13, 14, 15]), batch_size=run.batch_size)"""
        
        tensorboard_callback = torch.utils.tensorboard(log_dir=logdir)
        monitor_callback = pytorch_lightning.callbacks.ModelCheckpoint('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_test' + testsubjstr + '_dec' + decstr + '_runtime.h5', monitor='val_loss', verbose=True)

        root_dir = './data'
        dataf = 'data_.hdf5'
        headerf = 'header_.csv'
        subj_list = np.array([11, 12, 13, 14, 15])

        train_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list)
        train_gen = DataLoader(train_set, batch_size = run.batch_size, shuffle = True)
        # for the validation dataset
        #valid_set = MRISelectorSubjDataset(root_dir,dataf,headerf,subj_list)
        #valid_gen = DataLoader(valid_set, batch_size = run.batch_size, shuffle = False)
        
        selector = ConcreteAutoencoderFeatureSelector(K=n_means, output_function=dec, num_epochs=num_epochs, learning_rate=run.lr, start_temp=10.0, min_temp=0.1, 
                                                      tryout_limit=5, input_dim=1344, callback=[tensorboard_callback, monitor_callback])

        selector.fit(X=trainset)

        model = selector.get_params()

        model.save_weights('./runs/models/' + f'{run}' + 'K=' + str(n_means) + '_epoch=' + str(num_epochs) + '_testnone_dec' + decstr + '_trial' + str(trial) + '.h5')

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 1344)]            0         
_________________________________________________________________
concrete_select (ConcreteSel (None, 500)               672001    
_________________________________________________________________
dense_15 (Dense)             (None, 800)               400800    
_________________________________________________________________
leaky_re_lu_10 (LeakyReLU)   (None, 800)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 1000)              801000    
_________________________________________________________________
leaky_re_lu_11 (LeakyReLU)   (None, 1000)              0         
_________________________________________________________________
dense_17 (Dense)             (None, 1344)              1345

  ...
    to  
  ['...']






mean max of probabilities: 0.0008010466 - temperature 10.0
mean max of probabilities: 0.0010004895 - temperature 9.976355
mean max of probabilities: 0.0014470762 - temperature 9.952709
mean max of probabilities: 0.0034203383 - temperature 9.929064
mean max of probabilities: 0.005246357 - temperature 9.905418
mean max of probabilities: 0.0062826686 - temperature 9.881825
mean max of probabilities: 0.007274154 - temperature 9.86033
mean max of probabilities: 0.0079724025 - temperature 9.838834
mean max of probabilities: 0.008679924 - temperature 9.817338
mean max of probabilities: 0.009587731 - temperature 9.795842
mean max of probabilities: 0.010143932 - temperature 9.774346
mean max of probabilities: 0.010700223 - temperature 9.752851
mean max of probabilities: 0.011316635 - temperature 9.731355
mean max of probabilities: 0.011780981 - temperature 9.709859
mean max of probabilities: 0.0123565085 - temperature 9.688363
mean max of probabilities: 0.012912229 - temperature 9.666867
mean m

  ...
    to  
  ['...']






mean max of probabilities: 0.00080108334 - temperature 10.0
mean max of probabilities: 0.0010085095 - temperature 9.976355
mean max of probabilities: 0.0014710753 - temperature 9.952709
mean max of probabilities: 0.0036285291 - temperature 9.929064
mean max of probabilities: 0.005389432 - temperature 9.905418
mean max of probabilities: 0.0064429734 - temperature 9.881825
mean max of probabilities: 0.0073806974 - temperature 9.86033
mean max of probabilities: 0.008334144 - temperature 9.838834
mean max of probabilities: 0.00918177 - temperature 9.817338
mean max of probabilities: 0.009833288 - temperature 9.795842
mean max of probabilities: 0.010495411 - temperature 9.774346
mean max of probabilities: 0.011024365 - temperature 9.752851
mean max of probabilities: 0.011559345 - temperature 9.731355
mean max of probabilities: 0.012115288 - temperature 9.709859
mean max of probabilities: 0.012517167 - temperature 9.688363
mean max of probabilities: 0.012984671 - temperature 9.666867
mean ma

  ...
    to  
  ['...']






mean max of probabilities: 0.0008011019 - temperature 10.0
mean max of probabilities: 0.0010104261 - temperature 9.976355
mean max of probabilities: 0.0014603554 - temperature 9.952709
mean max of probabilities: 0.0033509757 - temperature 9.929064
mean max of probabilities: 0.0053267786 - temperature 9.905418
mean max of probabilities: 0.006408156 - temperature 9.881825
mean max of probabilities: 0.007428117 - temperature 9.86033
mean max of probabilities: 0.008363161 - temperature 9.838834
mean max of probabilities: 0.0090831425 - temperature 9.817338
mean max of probabilities: 0.01005278 - temperature 9.795842
mean max of probabilities: 0.010546624 - temperature 9.774346
mean max of probabilities: 0.011514009 - temperature 9.752851
mean max of probabilities: 0.01257038 - temperature 9.731355
mean max of probabilities: 0.013296135 - temperature 9.709859
mean max of probabilities: 0.014017367 - temperature 9.688363
mean max of probabilities: 0.014581711 - temperature 9.666867
mean max 