# Task B: Meta-Learning Perfomance Prediction

In this task, you will use information on training parameters and metadata on multiple OpenML dataset to train a performance predictor that performs well even for unseen datasets. You are provided with config parameters and metafeatures for six datasets. The datasets are split into training datasets and test datasets and you should only train on the training datasets.

For questions, you can contact zimmerl@informatik.uni-freiburg.

__Note: Please use the dataloading and splits you are provided with in this notebook.__

## Specifications:

* Data: six_datasets_lw.json
* Number of datasets: 6
* Training datasets: higgs, vehicle, adult, volkert
* Test datasets: Fashion-MNIST, jasmine
* Number of configurations: 2000
* Available data: architecture parameters and hyperparameters, metafeatures 
* Target: final validation accuracy
* Evaluation metric: MSE

## Importing and splitting data

Note: There are 51 steps logged, 50 epochs plus the 0th epoch, prior to any weight updates.

In [1]:
!pip install wget
!pip install zipfile36

Collecting wget
  Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-cp36-none-any.whl size=9682 sha256=870a1af212056deac29f3ec16b9d0dcbb2e30bab8cfe231cd6172cee38b6fe91
  Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
Collecting zipfile36
  Downloading https://files.pythonhosted.org/packages/fd/8a/3b7da0b0bd87d1ef05b74207827c72d348b56a0d6d83242582be18a81e02/zipfile36-0.1.3-py3-none-any.whl
Installing collected packages: zipfile36
Successfully installed zipfile36-0.1.3


In [17]:
import wget
import zipfile
dir_path = 'content/'
filename=wget.download('https://ndownloader.figshare.com/articles/11604705/versions/1')
with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall("")
with zipfile.ZipFile(dir_path+"six_datasets_lw.zip", 'r') as zip_ref:
    zip_ref.extractall("")
!rm six_datasets_lw.zip
filename=wget.download('https://ndownloader.figshare.com/files/21188673')
wget.download('https://raw.githubusercontent.com/automl/LCBench/master/api.py')
wget.download('https://raw.githubusercontent.com/infomon/Extrapolation-of-Learning-Curves/master/utils.py')

'utils (1).py'

In [0]:
%%capture
%cd ..
import numpy as np
import json
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

from content.api import Benchmark

In [16]:
bench_dir = "six_datasets_lw.json"
bench = Benchmark(bench_dir, cache=False)

==> Loading data...
==> No cached data found or cache set to False.
==> Reading json data...
==> Done.


In [0]:
with open("metafeatures.json", "r") as f:
    metafeatures = json.load(f)

In [194]:
# Dataset split
dataset_names = bench.get_dataset_names()
print(dataset_names)

test_datasets = ['adult', 'higgs', 'vehicle', 'volkert','jasmine']
test_datasets = ['higgs']

['Fashion-MNIST', 'adult', 'higgs', 'jasmine', 'vehicle', 'volkert']


In [0]:
# Prepare data
def read_data(datasets):
    n_configs = bench.get_number_of_configs(datasets[0])
    data = [bench.query(dataset_name=d, tag="Train/val_accuracy", config_id=ind) for d in datasets for ind in range(n_configs)]
    configs = [bench.query(dataset_name=d, tag="config", config_id=ind) for d in datasets for ind in range(n_configs)]
    dataset_names = [d for d in datasets for ind in range(n_configs)]
    
    data = np.array(data)
    number_of_samples = data.shape[0]
    a = []
    for i in range(number_of_samples):
      a.append({'Train/val_accuracy':data[i,0:10],'config':configs[i]})

    #y = np.array([curve[-1] for curve in data])
    y = np.array([curve for curve in data])
    #return np.array(configs), y, np.array(dataset_names)
    return np.array(a), y, np.array(dataset_names)

class TrainValSplitter():
    """Splits 25 % data as a validation split."""
    
    def __init__(self, dataset_names):
        self.ind_train, self.ind_val = train_test_split(np.arange(len(X)), test_size=0.25, stratify=dataset_names)
        
    def split(self, a):
        return a[self.ind_train], a[self.ind_val]

X_test, y_test, dataset_names_test = read_data(test_datasets)


In [0]:
import utils

In [0]:
test_data_loader = utils.prep_data(X_test, y_test, batch_size=32,normalization_factor_temporal_data=[100])

The data contains the configuration.

__Note__: Not all parameters vary across different configurations. The varying parameters are batch_size, max_dropout, max_units, num_layers, learning_rate, momentum, weight_decay

## Training and scoring

In [0]:
multivariate = False

In [0]:
def preprocess_batch(batch):
  temporal = batch[:nof_multi if multivariate else 1]
  temporal = torch.stack([t for t in temporal],dim=0)
  configs = batch[-2]
  targets = batch[-1]
  return temporal, configs, targets

In [0]:
def test(model, criterion):
    model.eval()
    epoch_loss=[]
    with torch.no_grad():
      for batch in test_data_loader:
        temporal, configs, targets = preprocess_batch(batch)
        output = model([temporal, configs], targets, 0)
        output = output.squeeze()
        output = torch.t(output)
        loss = criterion(output[:,-1], targets[:,-1])
        epoch_loss.append(loss.item())
        
    return np.array(epoch_loss).mean()

In [0]:
import torch

In [0]:
class EncoderRNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, nof_configs, num_layers, dropout = 0.5, bidirectional=False):
        super(EncoderRNN, self).__init__()
        
        self.nof_configs = nof_configs
        self.num_layers = num_layers
        self.dropout = dropout
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=input_size, 
                                  hidden_size=hidden_size,
                                  num_layers=num_layers,
                                  dropout=dropout,
                                  bidirectional=bidirectional)

        self.relu = torch.nn.functional.relu

        self.encode_fc1 = torch.nn.Linear(self.nof_configs,int(self.hidden_size/2))
        self.encode_bn1 = torch.nn.BatchNorm1d(int(self.hidden_size/2))
        self.encode_fc2 = torch.nn.Linear(int(self.hidden_size/2),self.hidden_size)
        self.encode_bn2 = torch.nn.BatchNorm1d(self.hidden_size)

    def forward(self, seq, config):
        h0 = self.initHidden(config)
        c0 = self.initCell(seq.size()[1])
        seq = seq.permute(2,1,0)
        output, (hidden,cell) = self.lstm(seq, (h0,c0))
        return output, hidden, cell

    def initHidden(self, config):
        x = self.relu(self.encode_bn1(self.encode_fc1(config)))
        x = self.relu(self.encode_bn2(self.encode_fc2(x)))
        return torch.stack([x for _ in range(self.num_layers*self.num_directions)])

    def initCell(self, batch_size):
        return torch.zeros(self.num_layers*self.num_directions, batch_size, self.hidden_size)

In [0]:
class DecoderRNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout = 0.5):
        super(DecoderRNN, self).__init__()
        
        self.output_size = output_size
        self.num_layers = num_layers
        self.dropout = dropout
        
        self.hidden_size = hidden_size
        self.lstm = torch.nn.LSTM(input_size=input_size, 
                                  hidden_size=hidden_size,
                                  num_layers=num_layers,
                                  dropout=dropout,
                                  bidirectional=False)
        
        self.fc_out = torch.nn.Linear(hidden_size, output_size)
        self.relu = torch.nn.functional.relu

    def forward(self, seq, h0, c0):
        seq = seq.unsqueeze(0)
        seq = seq.unsqueeze(-1)
        output, (hidden,cell) = self.lstm(seq, (h0,c0))
        output = self.fc_out(output)
        return output.squeeze(), hidden, cell


In [0]:
class Seq2Seq(torch.nn.Module):
  def __init__(self, encoder, decoder):
    super(Seq2Seq, self).__init__()
    self.encoder = encoder
    self.decoder = decoder

    assert encoder.hidden_size == decoder.hidden_size
    #assert encoder.num_layers == decoder.num_layers

  def forward(self, source, target, teacher_forcing_ratio = 0.5):
    batch_size = target.size()[0]
    target_len = target.size()[1]

    outputs = torch.zeros(target_len, batch_size, 1)

    seq , config = source
    output, hidden, cell = self.encoder(seq, config)

    decoder_input = target[:,0]
    for t in range(1, target_len):
      output, hidden, cell = self.decoder(decoder_input, hidden, cell)
      outputs[t] = output.unsqueeze(-1)
      use_teacher_forcing = np.random.random() < teacher_forcing_ratio
      decoder_input = target[:,t] if use_teacher_forcing else output
    return outputs

In [0]:
epochs = 500

input_size = 1
decoder_input_size = 1
output_size = 1
config_size = 7
hidden_size=25
encoder_dropout=0.5
decoder_dropout=0.5
num_layers=2
bidirectional = True
encoder = EncoderRNN(input_size, hidden_size=hidden_size, nof_configs=config_size, num_layers=num_layers,
                      dropout=encoder_dropout,bidirectional=bidirectional)
decoder = DecoderRNN(decoder_input_size, hidden_size=hidden_size, output_size=output_size, 
                      num_layers=2*num_layers if bidirectional else num_layers,dropout=decoder_dropout)
model = Seq2Seq(encoder, decoder)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=10e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50)

In [0]:
model_list = []
for i in range(5):
  model = encoder = EncoderRNN(input_size, hidden_size=hidden_size, nof_configs=config_size, num_layers=num_layers,
                      dropout=encoder_dropout,bidirectional=bidirectional)
  decoder = DecoderRNN(decoder_input_size, hidden_size=hidden_size, output_size=output_size, 
                      num_layers=2*num_layers if bidirectional else num_layers,dropout=decoder_dropout)
  model = Seq2Seq(encoder, decoder)

  model.load_state_dict(torch.load("content/model_cond_lstm"+str(i)+".pt"))
  model_list.append(model)

In [207]:
test_losses = []
for i in range(5):
  test_losses.append(test(model_list[i],criterion))
print(np.array(test_losses).mean())

210.86206994435142


In [0]:
def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2))

In [209]:
test_losses = []
for i in range(5):
  test_losses.append(test(model_list[i],RMSELoss))
print(np.array(test_losses).mean())

14.498968578520273
