# Task A: Creating a Performance Predictor

In this task, you will use training data from 2000 configurations on a single OpenML dataset to train a performance predictor. The data will be splitted into train, test and validation set and we will only use the first 10 epochs of the learning curves for predicitons. You are provided with the full benchmark logs for Fashion-MNIST, that is learning curves, config parameters and gradient statistics, and you can use them freely.

For questions, you can contact zimmerl@informatik.uni-freiburg.

__Note: Please use the dataloading and splits you are provided with in this notebook.__

## Specifications:

* Data: fashion_mnist.json
* Number of datasets: 1
* Number of configurations: 2000
* Number of epochs seed during prediction: 10
* Available data: Learning curves, architecture parameters and hyperparameters, gradient statistics 
* Target: Final validation accuracy
* Evaluation metric: MSE

## Importing and splitting data

__Note__: There are 51 steps logged, 50 epochs plus the 0th epoch, prior to any weight updates.

In [7]:
!pip install wget
!pip install zipfile36

Collecting wget
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25ldone
[?25h  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9680 sha256=8ec0d3fe632d8c759e89484c457bd554c15874fc1cf05dfc8b56278a625178b7
  Stored in directory: /home/sambit/.cache/pip/wheels/90/1d/93/c863ee832230df5cfc25ca497b3e88e0ee3ea9e44adc46ac62
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
You should consider upgrading via the '/home/sambit/.pyenv/versions/3.6.9/bin/python3.6 -m pip install --upgrade pip' command.[0m
Collecting zipfile36
  Downloading zipfile36-0.1.3-py3-none-any.whl (20 kB)
Installing collected packages: zipfile36
Successfully installed zipfile36-0.1.3
You should consider upgrading via the '/home/sambit/.pyenv/versions/3.6.9/bin/python3.6 -m pip install --upgrade pip' command.[0m


In [21]:
import wget
import zipfile
dir_path = './content/'
filename=wget.download('https://ndownloader.figshare.com/files/21001311')
with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall("")
!rm fashion_mnist.zip
wget.download('https://raw.githubusercontent.com/automl/LCBench/master/api.py')
wget.download('https://raw.githubusercontent.com/infomon/Extrapolation-of-Learning-Curves/master/utils.py')
!mkdir content/models
!mkdir models

mkdir: cannot create directory ‘content/models’: No such file or directory


In [24]:
%%capture
%cd ..
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

from api import Benchmark
import content.utils as utils
import torch

ModuleNotFoundError: No module named 'api'

In [23]:
# bench_dir = dir_path+"fashion_mnist.json"
bench_dir = '/home/sambit/PROGRAMMING/DL_PROJECT/TEAM_WORK_FREIBURG/Extrapolation-of-Learning-Curves/DATA/fashion_mnist.json'
bench = Benchmark(bench_dir, cache=False)

NameError: name 'Benchmark' is not defined

In [0]:
# Read data
def cut_data(data, cut_position):
    targets = []
    for dp in data:
        targets.append(dp["Train/val_accuracy"][50])
        for tag in dp:
            if tag.startswith("Train/"):
                dp[tag] = dp[tag][0:cut_position]
    return data, targets

def read_data():
    dataset_name = 'Fashion-MNIST'
    n_configs = bench.get_number_of_configs(dataset_name)
    
    # Query API
    data = []
    for config_id in range(n_configs):
        data_point = dict()
        data_point["config"] = bench.query(dataset_name=dataset_name, tag="config", config_id=config_id)
        for tag in bench.get_queriable_tags(dataset_name=dataset_name, config_id=config_id):
            if tag.startswith("Train/"):
                data_point[tag] = bench.query(dataset_name=dataset_name, tag=tag, config_id=config_id)    
        data.append(data_point)
        
    # Split: 50% train, 25% validation, 25% test (the data is already shuffled)
    indices = np.arange(n_configs)
    ind_train = indices[0:int(np.floor(0.5*n_configs))]
    ind_val = indices[int(np.floor(0.5*n_configs)):int(np.floor(0.75*n_configs))]
    ind_test = indices[int(np.floor(0.75*n_configs)):]

    array_data = np.array(data)
    train_data = array_data[ind_train]
    val_data = array_data[ind_val]
    test_data = array_data[ind_test]
    
    # Cut curves for validation and test
    cut_position = 11
    val_data, val_targets = cut_data(val_data, cut_position)
    test_data, test_targets = cut_data(test_data, cut_position)
    train_data, train_targets = cut_data(train_data, 51)   # Cut last value as it is repeated
    
    return train_data, val_data, test_data, train_targets, val_targets, test_targets
    
train_data, val_data, test_data, train_targets, val_targets, test_targets = read_data()

print("Train:", len(train_data))
print("Validation:", len(val_data))
print("Test:", len(test_data))

The data contains the configuration of the trained model and learning curves as well as global and layer-wise gradient statistics.

__Note__: Not all parameters vary across different configurations. The varying parameters are batch_size, max_dropout, max_units, num_layers, learning_rate, momentum, weight_decay

In [0]:
# Config
print("Config example:", train_data[0]["config"])

In [0]:
train_data[1]["config"]

In [0]:
# Learning curve
plt.plot(train_data[10]["Train/val_accuracy"])

In [0]:
# Gradient statistics
plt.plot(train_data[10]["Train/layer_wise_gradient_mean_layer_0"])

## A simple baseline

In [0]:
class SimpleLearningCurvePredictor():
    """A learning curve predictor that predicts the last observed epoch of the validation accuracy as final performance"""
    
    def __init__(self):
        pass
        
    def fit(self, X, y):
        pass
    
    def predict(self, X):
        predictions = []
        for datapoint in X:
            predictions.append(datapoint["Train/val_accuracy"][-1])
        return predictions
    
def score(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)

In [0]:
# Training & tuning
predictor = SimpleLearningCurvePredictor()
predictor.fit(train_data, train_targets)
preds = predictor.predict(val_data)
mse = score(val_targets, preds)
print("Score on validation set:", mse)

In [0]:
# Final evaluation (after tuning)
final_preds = predictor.predict(test_data)
final_score = score(test_targets, final_preds)
print("Final test score:", final_score)

In [0]:
device = utils.check_cuda()

In [6]:
train_data_loader = utils.prep_data(train_data, train_targets, batch_size=32,normalization_factor_temporal_data=[100])
val_data_loader = utils.prep_data(val_data, val_targets, batch_size=32,normalization_factor_temporal_data=[100])
test_data_loader = utils.prep_data(test_data, test_targets, batch_size=32,normalization_factor_temporal_data=[100])

print("train data shape : ", train_data_loader.shape)

NameError: name 'utils' is not defined

In [5]:
#-------- new code : Sambit, 20/02/2020 ------------------------------#
# create a MLP in pytorch : 3 hidden dense layers, 16, 16, 8
import torch.nn as nn
import torch.nn.functional as F

class LearningCurveMLP(nn.Module):
    """
    Create the architecture
    """

    def __init__(self, input_size, dropout=0):
        """
        Default constructor
        """

        super(LearningCurveMLP, self).__init__()

        self.L1_linear = nn.Linear(7, 16)  # dense layer, 7 inputs (same as number of input features), 16 outputs, so 16 hidden units
        self.L2_linear = nn.Linear(16, 16)
        self.L3_linear = nn.Linear(16, 8)
        self.L4_linear = nn.Linear(8, 1)

        self.drpout = dropout



    def forward(self, x):
        """
        Forward propagation function
        """

        out1 = F.Dropout(F.Sigmoid(self.L1_linear(x)), p=0.3)
        out2 = F.Dropout(F.Sigmoid(self.L2_linear(out1)), p=0.3)
        out3 = F.Dropout(F.Sigmoid(self.L3_linear(out2)), p=0.3)

        out = F.Sigmoid(self.L4_linear(out3))

        return out





In [0]:
class UnivariatMultiStepLSTM(torch.nn.Module):
    """An univariate multi-step LSTM that predicts from a single input sequence the validation learning curve"""
    
    def __init__(self,input_size, hidden_size, output_size, 
                 num_layers=1, lstm_dropout=0, bidirectional=False,fc_dropout=0):
        super(UnivariatMultiStepLSTM,self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers
        self.lstm_dropout = lstm_dropout
        self.bidirectional = bidirectional
        self.fc_dropout = fc_dropout

        self.relu = torch.nn.functional.relu
        self.encode_fc1 = torch.nn.Linear(7,int(self.hidden_size/4))
        self.encode_bn1 = torch.nn.BatchNorm1d(int(self.hidden_size/4))
        self.encode_fc2 = torch.nn.Linear(int(self.hidden_size/4),self.hidden_size)
        self.encode_bn2 = torch.nn.BatchNorm1d(self.hidden_size)

        self.lstm = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                            dropout=lstm_dropout, bidirectional=bidirectional)
        
        self.fc = torch.nn.Linear(self.hidden_size*10*2 if self.bidirectional else hidden_size*10,output_size)

    def encode(self,x):
        x = self.relu(self.encode_bn1(self.encode_fc1(x)))
        return self.encode_bn2(self.encode_fc2(x))

    def forward(self,x):
        seq, config = x
        batch_size = seq.size(0)

        h0 = self.encode(config)
        h0 = torch.stack([h0 for _ in range(self.num_layers*2 if self.bidirectional else self.num_layers)])
        c0 = torch.zeros(self.num_layers*2 if self.bidirectional else self.num_layers, config.size()[0], self.hidden_size)
        seq = torch.transpose(seq,1,0)
        seq = seq.unsqueeze(-1)

        lstm_out, _ = self.lstm(seq,(h0,c0))
        lstm_out = lstm_out.permute(1,0,2)
        lstm_out = lstm_out.contiguous().view(batch_size,-1)
      
        forecast = self.fc(lstm_out)
        return forecast

In [0]:
def train(model, optimizer, criterion, clip=5):
    model.train()
    epoch_loss = []
    for val_acc, configs, targets in train_data_loader:
      optimizer.zero_grad()
      output = model([val_acc,configs])
      loss = criterion(output, targets)
      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(),clip)
      optimizer.step()
      epoch_loss.append(loss.item())
    return np.array(epoch_loss).mean()

In [0]:
def evaluate(model, criterion):
  model.eval()
  epoch_loss = []
  with torch.no_grad():
    for val_acc, configs, targets in val_data_loader:
      output = model([val_acc, configs])
      loss = criterion(output, targets)
      epoch_loss.append(loss.item())
  return np.array(epoch_loss).mean()

In [0]:
def test(model, criterion):
    #model.load_state_dict(torch.load('content/models/model.pt'))
    model.eval()
    epoch_loss=[]
    with torch.no_grad():
      for val_acc, configs, targets in test_data_loader:
        output = model([val_acc, configs])
        loss = criterion(output, targets)
        epoch_loss.append(loss.item())
        
    return np.array(epoch_loss).mean()

In [0]:
def max_error(model, criterion):
    #model.load_state_dict(torch.load('content/models/model.pt'))
    model.eval()
    epoch_loss=[]
    with torch.no_grad():
      for val_acc, configs, targets in test_data_loader:
        output = model([val_acc, configs])
        loss = np.abs(output.detach().numpy()-targets.detach().numpy().reshape(-1,1))
        epoch_loss += loss.tolist()
        
    return np.array(epoch_loss)

In [0]:
def init_weights(m):
    for name, param in m.named_parameters():
      torch.nn.init.uniform_(param.data, -0.08, 0.08)

In [11]:
input_size = 1
outcome_dim = 1
hidden_dim=35
num_layers=2
config_size = 7
bidirectional = True
lstm_dropout=0.5
fc_dropout=0.0

model = UnivariatMultiStepLSTM(input_size, hidden_dim, outcome_dim, num_layers,
                      lstm_dropout=lstm_dropout,bidirectional=bidirectional,fc_dropout=fc_dropout)
pytorch_total_params = sum(p.numel() for p in model.parameters())
print(pytorch_total_params)
model.apply(init_weights)

41766


UnivariatMultiStepLSTM(
  (encode_fc1): Linear(in_features=7, out_features=8, bias=True)
  (encode_bn1): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (encode_fc2): Linear(in_features=8, out_features=35, bias=True)
  (encode_bn2): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lstm): LSTM(1, 35, num_layers=2, dropout=0.5, bidirectional=True)
  (fc): Linear(in_features=700, out_features=1, bias=True)
)

In [0]:
epochs=200
lr=0.01
weight_decay = 10e-3
T_0 = int(epochs/4)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

In [0]:
from collections import namedtuple
train_stats = namedtuple("Stats",["train_loss", "val_loss"])
stats = train_stats(train_loss=np.zeros(epochs),
                     val_loss=np.zeros(epochs))

In [0]:
best_val_loss = float('inf')

for epoch in range(epochs):
  train_loss = train(model, optimizer, criterion)
  val_loss = evaluate(model, criterion)

  if val_loss < best_val_loss:
    torch.save(model.state_dict(),"content/models/model_5.pt")    
    print('Val loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(best_val_loss,val_loss))
    best_val_loss = val_loss

  print(f'Epoch: {epoch}\t Train Loss: {train_loss:.3f}\t Val. Loss: {val_loss:.3f}')
  stats.train_loss[epoch] = train_loss
  stats.val_loss[epoch] = val_loss

  scheduler.step()

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Val loss decreased (inf --> 69.324066).  Saving model ...
Epoch: 0	 Train Loss: 1505.536	 Val. Loss: 69.324
Val loss decreased (69.324066 --> 63.472178).  Saving model ...
Epoch: 1	 Train Loss: 81.310	 Val. Loss: 63.472
Epoch: 2	 Train Loss: 79.564	 Val. Loss: 91.739
Val loss decreased (63.472178 --> 62.456640).  Saving model ...
Epoch: 3	 Train Loss: 81.428	 Val. Loss: 62.457
Val loss decreased (62.456640 --> 62.332173).  Saving model ...
Epoch: 4	 Train Loss: 81.048	 Val. Loss: 62.332
Val loss decreased (62.332173 --> 62.097000).  Saving model ...
Epoch: 5	 Train Loss: 77.678	 Val. Loss: 62.097
Epoch: 6	 Train Loss: 76.837	 Val. Loss: 63.627
Epoch: 7	 Train Loss: 76.987	 Val. Loss: 62.134
Epoch: 8	 Train Loss: 77.556	 Val. Loss: 65.918
Epoch: 9	 Train Loss: 78.863	 Val. Loss: 88.224
Epoch: 10	 Train Loss: 79.217	 Val. Loss: 65.825
Epoch: 11	 Train Loss: 76.923	 Val. Loss: 62.479
Epoch: 12	 Train Loss: 77.953	 Val. Loss: 63.078
Epoch: 13	 Train Loss: 77.424	 Val. Loss: 63.318
Epoch: 1

KeyboardInterrupt: ignored

In [0]:
np.save("content/train_stats_5.npy",stats)

In [0]:
def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2))

In [0]:
test_loss = test(model, criterion)
print(test_loss)

In [0]:
model_list = []
for i in range(5):
  model = UnivariatMultiStepLSTM(input_size, hidden_dim, outcome_dim, num_layers,
                      lstm_dropout=lstm_dropout,bidirectional=bidirectional,fc_dropout=fc_dropout)
  model.load_state_dict(torch.load("content/model_"+str(i+1)+".pt"))
  model_list.append(model)

In [19]:
test_losses = []
for i in range(5):
  test_losses.append(test(model_list[i],criterion))
print(np.array(test_losses).mean())

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


63.266495275497434


In [20]:
test_losses = []
for i in range(5):
  test_losses.append(test(model_list[i],RMSELoss))
print(np.array(test_losses).mean())

7.652655375003815


In [0]:
tmp = max_error(model_list[0],RMSELoss)

In [22]:
max_errors = []
for i in range(5):
  max_errors.append(max_error(model_list[i],RMSELoss).max())
print(np.array(max_errors).mean())

53.93588714599609


In [23]:
max_errors = []
for i in range(5):
  max_errors.append(max_error(model_list[i],RMSELoss))
print(np.array(max_errors).mean(axis=0).shape)
np.save("max_errors",np.array(max_errors).mean(axis=0))

(500, 1)


In [0]:
def test_ensemble(model_list, criterion):
    #model.load_state_dict(torch.load('content/models/model.pt'))
    epoch_loss=[]
    with torch.no_grad():
      for val_acc, configs, targets in test_data_loader:
        output = []
        for model in model_list:
          model.eval()
          output.append(model([val_acc, configs]))
        output = torch.stack(output)
        output = torch.mean(output,dim=0)
        loss = criterion(output, targets.unsqueeze(-1))
        epoch_loss.append(loss.item())
        
    return np.array(epoch_loss).mean()

In [39]:
test_losses=test_ensemble(model_list,criterion)
print(test_losses)

62.36482071876526


In [49]:
test_losses=test_ensemble(model_list,RMSELoss)
print(test_losses)

7.596453607082367


In [0]:
def max_error_ensemble(model_list, criterion):
    epoch_loss=[]
    with torch.no_grad():
      for val_acc, configs, targets in test_data_loader:
        output = []
        for model in model_list:
          model.eval()
          output.append(model([val_acc, configs]))
        output = torch.stack(output)
        output = torch.mean(output,dim=0)
        loss = np.abs(output.detach().numpy()-targets.detach().numpy().reshape(-1,1))
        epoch_loss += loss.tolist()
        
    return np.array(epoch_loss)

In [48]:
test_losses=max_error_ensemble(model_list,RMSELoss)
print(test_losses.max())

53.935890197753906
