In [1]:
import torch


from torch.utils.data import  DataLoader



from gpytorch.kernels import RBFKernel


import logging

from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, Subset,TensorDataset

from utils.HelperFunctions import *
from utils.Models import *
from utils.Kernels import *
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Logger

Setting up logger. Training MOGP models in this research takes quite some time, especially if you are doing massive scale grid search or cross validation. The logger here is to assure even when training unexpectedly terminated with anomaly, records are still kept for later query.

In [2]:
logger = logging.getLogger('The_Logger')
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler('result.log')
fh.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)

error = logging.getLogger('error')
error.setLevel(logging.DEBUG)
error_fh=logging.FileHandler('error.log')
error_fh.setLevel(logging.DEBUG)
error_ch=logging.StreamHandler()
error_fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

if not any(isinstance(handler, logging.StreamHandler) for handler in logger.handlers):
    error.addHandler(error_fh)
    error.addHandler(error_ch)
    logger.addHandler(fh)
    logger.addHandler(ch)
    print("StreamHandler added")
else:
    print("StreamHandler already exists")


StreamHandler added


In [4]:
X_path,y_path,X_domain_path=get_dataset_path("FULL_SHOTS")
(X_train_tensor,X_D_train_tensor,y_train_tensor),(X_test_tensor,X_D_test_tensor,y_test_tensor)=load_dataset(X_path,y_path,X_domain_path=X_domain_path)

print(f"Shape of X_train:{X_train_tensor.shape}")
print(f"Shape of X_D_train(domain information of X_train):{X_train_tensor.shape}")
print(f"Shape of y_train:{y_train_tensor.shape}")

(274, 1084)
(274, 10)
(274, 1)
Shape of X_train:torch.Size([246, 1084])
Shape of X_D_train(domain information of X_train):torch.Size([246, 1084])
Shape of y_train:torch.Size([246, 10])


Calculating & configuring some global variables.

In [5]:
Global=config()


Global.NUM_CONC=y_train_tensor.shape[1]

Global.NUM_FEAT=X_train_tensor.shape[1]
Global.NUM_DOMAIN_FEAT=X_D_train_tensor.shape[1]
NUMS_DOMAIN, max_indices_row = torch.max(X_D_train_tensor, dim=0)
NUMS_DOMAIN.add_(1)


print(f'NUMS_DOMAIN: {NUMS_DOMAIN.item()}')
Global.NUMS_DOMAIN=NUMS_DOMAIN.long()
X_train_tensor = torch.cat((X_D_train_tensor, X_train_tensor), dim=1)
X_test_tensor = torch.cat((X_D_test_tensor, X_test_tensor), dim=1)
print(X_train_tensor.shape)
print(X_test_tensor.shape)

NUMS_DOMAIN: 30.0
torch.Size([246, 1085])
torch.Size([28, 1085])


# Playground

## Example usage of cross validation unit

In [6]:
Global.lr=0.1
Global.gamma=0.5
Global.STEP_SIZE=50
Global.NUMS_DOMAIN_FEATURE=1

In [7]:
params= [
    # [MultitaskGP, RBFKernel, 0.18, 55, 0.8],
    # [Linear_Model_Of_Corregionalization, RBFKernel, 0.1, 60, 0.8],
    # [MultitaskGP, K_MS, 0.18, 55, 0.65],
    [Linear_Model_Of_Corregionalization, K_Alpha_Beta,0.69, 33, 0.2],
    # [MultitaskGP, K_MS_with_Feat_Scaling, 0.18, 55,0.65],
]

In [None]:
X = torch.cat((X_train_tensor, X_test_tensor), dim=0)
y = torch.cat((y_train_tensor, y_test_tensor), dim=0)
dataset = TensorDataset(X, y)


k_folds = 10
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)


fold_results = {} # to store result of each folds
m=[]
nlls=[]
for each_param in params:
        Global.lr=each_param[2]
        Global.STEP_SIZE=each_param[3]
        Global.gamma=each_param[4]
        for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):
            logger.info('--------------------------------')
            logger.info(f'FOLD {fold+1}')

            
            # creating training and validation sets
            train_subset = Subset(dataset, train_idx)
            val_subset = Subset(dataset, val_idx)
            
            # creating dataloaders
            train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
            val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
            X_train_tensor,y_train_tensor=dataloader2tensor(train_loader)
            X_test_tensor,y_test_tensor=dataloader2tensor(val_loader)
            try:
                md,n=run_test(X_train_tensor,y_train_tensor,X_test_tensor,y_test_tensor,model=each_param[0],kernel=each_param[1],config=Global,logger=logger)
                m.append(md)
                nlls.append(n)
            except Exception as e:
                error.error(f"Model:{str(each_param[0])}; Kernel:{str(each_param[1])}; lr:{str(Global.lr)};STEP_SIZE:{str(Global.STEP_SIZE)}; {e}")
                continue


## Example usage of grid search unit

In [46]:
param_grid = {
    'models':[Linear_Model_Of_Corregionalization],
    'kernels': [RBFKernel],
    'lrs': [0.69 ],
    'gammas':[0.5],
    'STEP_SIZEs':[33]
}

grid_search(X_train_tensor,y_train_tensor,X_test_tensor,y_test_tensor,param_grid,Global)
