# Packed ensemble submission process

## Imports

In [1]:
import time

import numpy as np
import pandas as pd
import os
import pickle
import torch
import torch.nn.functional as F

from lips import get_root_path
from lips.dataset.scaler.standard_scaler import StandardScaler
from lips.benchmark.airfransBenchmark import AirfRANSBenchmark
from lips.dataset.airfransDataSet import download_data
from lips.augmented_simulators.torch_simulator import TorchSimulator
from lips.dataset.scaler.standard_scaler_iterative import StandardScalerIterative

#from my_custom_packed_ensemble import *
#from my_packed_cv import *
from my_augmented_simulator import *

## Generic Step (Load the required data) <a id='generic_step'></a>

In [2]:
# indicate required paths
LIPS_PATH = get_root_path()
DIRECTORY_NAME = '../ml4physim_startingkit/Dataset'
BENCHMARK_NAME = "Case1"
LOG_PATH = LIPS_PATH + "lips_logs.log"

Define the configuration files path, that aim to describe specific caracteristics of the use case or the augmented simulator.

In [3]:
BENCH_CONFIG_PATH = os.path.join("airfoilConfigurations", "benchmarks",
                                 "confAirfoil.ini")  #Configuration file related to the benchmark
SIM_CONFIG_PATH = r"config.ini"  #Configuration file re

Download the data

In [4]:
if not os.path.isdir(DIRECTORY_NAME):
    download_data(root_path=".", directory_name=DIRECTORY_NAME)

Loading the dataset using the dedicated class used by LIPS platform offers a list of advantages:

1. Ease the importing of datasets
1. A set of functions to organize the `inputs` and `outputs` required by augmented simulators


In [5]:
# Load the required benchmark datasets
def load_dataset():
    """
    Load the airfrans dataset as a benchmark object

    Returns
    -------
    benchmark : AirfRANSBenchmark
        The airfrans benchmark object
    """
    try:
        with open('benchmark.pkl', 'rb') as f:
            benchmark = pickle.load(f)
    except:
        benchmark = AirfRANSBenchmark(benchmark_path=DIRECTORY_NAME,
                                    config_path=BENCH_CONFIG_PATH,
                                    benchmark_name=BENCHMARK_NAME,
                                    log_path=LOG_PATH)
        benchmark.load(path=DIRECTORY_NAME)
        with open('benchmark.pkl', 'wb') as f:
            pickle.dump(benchmark, f)
    
    return benchmark

#benchmark = load_dataset()

## Simulation

### Model creation

In [6]:
def simulate(config_names):
    """
    Creates a packed MLP model, trains it and evaluates it on the test dataset.

    Parameters
    ----------
    config_names : list
        List of the names of the configurations to be used for training the model.

    Returns
    -------
    int
        0 if we have successfully trained and evaluated the model.
    """
    
    for config_name in config_names:
        print("Config name : ", config_name)
        print("loading data...")
        benchmark = load_dataset()

        chunk_sizes=benchmark.train_dataset.get_simulations_sizes()
        no_norm_x=benchmark.train_dataset.get_no_normalization_axis_indices()
        scalerParams={"chunk_sizes":chunk_sizes,"no_norm_x":no_norm_x}

        name = "packed_mlp"

        print("defining model...")
        # PackedMLP model definition 
        torch_sim = TorchSimulator(name=name,
                           model=PackedMLP,
                           scaler=StandardScalerIterative,
                           scalerParams=scalerParams,
                           log_path=None,
                           device="cuda:0",
                           seed=42,
                           bench_config_path=BENCH_CONFIG_PATH,
                           bench_config_name="Benchmark1",
                           sim_config_path=SIM_CONFIG_PATH,
                           sim_config_name=config_name,
                          )
        
        print("training...")
        # model training 
        start = time.perf_counter()
        torch_sim.train(benchmark.train_dataset, 
                save_path=None,
                pin_memory=True, 
                non_blocking=True, 
                num_workers=6
                )
        end = time.perf_counter()
        train_time = end-start
        
        print("saving model...")
        # saving the model 
        torch_sim.save(path="./models")

        print("evaluating model...")
        # evaluating the model 
        start = time.perf_counter()
        torch_sim_metrics = benchmark.evaluate_simulator(augmented_simulator=torch_sim,
                                                  eval_batch_size=256000,
                                                  dataset="all",
                                                  shuffle=False,
                                                  save_path="./evaluations",
                                                  save_predictions=True
                                                 )
        end = time.perf_counter()
        evaluation_time = end-start
        
        # save the evaluation time to file
        with open(f"evaluations/{name}_{config_name}/time.txt", "a") as f:
            f.write(f"Training took {train_time:.2f} seconds\n")
            f.write(f"Evaluation took {evaluation_time:.2f} seconds")
        
        del benchmark
    return 0

In [7]:
"""
    Anthony : 0
    Anton   : 1
"""

partition = 1

if partition == 0:  config_names = ["DEEP_SMALL_A4_DECAY_DROPOUT", "DEEP_SMALL_A6_DECAY_DROPOUT", "DEEP_SMALL_G2_DECAY", "DEEP_SMALL_G4_DECAY"]
else:               config_names = ["DEEP_SMALL_A2_DECAY", "DEEP_SMALL_A4_DECAY", "DEEP_SMALL_A6_DECAY"]

simulate(config_names)

Config name :  DEEP_SMALL_A2
loading data...
defining model...
training...
Train Epoch: 0   Avg_Loss: 6.07765 ['MAELoss: 4.11119']
Train Epoch: 1   Avg_Loss: 5.85345 ['MAELoss: 3.86453']
Train Epoch: 2   Avg_Loss: 5.61441 ['MAELoss: 3.64932']
Train Epoch: 3   Avg_Loss: 5.36938 ['MAELoss: 3.48381']
Train Epoch: 4   Avg_Loss: 5.13334 ['MAELoss: 3.39187']
Train Epoch: 5   Avg_Loss: 4.91894 ['MAELoss: 3.34034']
Train Epoch: 6   Avg_Loss: 4.73939 ['MAELoss: 3.30213']
Train Epoch: 7   Avg_Loss: 4.58647 ['MAELoss: 3.26578']
Train Epoch: 8   Avg_Loss: 4.45419 ['MAELoss: 3.22700']
Train Epoch: 9   Avg_Loss: 4.33539 ['MAELoss: 3.18295']
Train Epoch: 10   Avg_Loss: 4.22725 ['MAELoss: 3.13750']
Train Epoch: 11   Avg_Loss: 4.13377 ['MAELoss: 3.09346']
Train Epoch: 12   Avg_Loss: 4.05200 ['MAELoss: 3.05124']
Train Epoch: 13   Avg_Loss: 3.97870 ['MAELoss: 3.01030']
Train Epoch: 14   Avg_Loss: 3.91198 ['MAELoss: 2.97115']
Train Epoch: 15   Avg_Loss: 3.85033 ['MAELoss: 2.93454']
Train Epoch: 16   Avg_L

0

In [None]:
# simulate a particular set of configurations
simulate(["DEEP_SMALL_3", "DEEP_SMALL_5"])

Config name :  DEEP_SMALL_3
loading data...
defining model...
training...
Train Epoch: 0   Avg_Loss: 5.86416 ['MAELoss: 3.81465']
Train Epoch: 1   Avg_Loss: 5.43878 ['MAELoss: 3.46463']
Train Epoch: 2   Avg_Loss: 5.04451 ['MAELoss: 3.29999']
Train Epoch: 3   Avg_Loss: 4.70953 ['MAELoss: 3.23064']
Train Epoch: 4   Avg_Loss: 4.44560 ['MAELoss: 3.16900']
Train Epoch: 5   Avg_Loss: 4.23493 ['MAELoss: 3.09504']
Train Epoch: 6   Avg_Loss: 4.07055 ['MAELoss: 3.02052']
Train Epoch: 7   Avg_Loss: 3.93675 ['MAELoss: 2.94812']
Train Epoch: 8   Avg_Loss: 3.82260 ['MAELoss: 2.88436']
Train Epoch: 9   Avg_Loss: 3.72014 ['MAELoss: 2.82495']
Train Epoch: 10   Avg_Loss: 3.62594 ['MAELoss: 2.76979']
Train Epoch: 11   Avg_Loss: 3.53574 ['MAELoss: 2.71655']
Train Epoch: 12   Avg_Loss: 3.44907 ['MAELoss: 2.66550']
Train Epoch: 13   Avg_Loss: 3.36603 ['MAELoss: 2.61660']
Train Epoch: 14   Avg_Loss: 3.28656 ['MAELoss: 2.56992']
Train Epoch: 15   Avg_Loss: 3.21109 ['MAELoss: 2.52562']
Train Epoch: 16   Avg_Lo

In [None]:
# read model and plot training curve

import json
import matplotlib.pyplot as plt

with open('models/packed_mlp_SMOOTH_G4/losses.json', 'r') as f:
    losses = json.load(f)["train_losses"]

plt.plot(losses)
benchmark = load_dataset()
chunk_sizes=benchmark.train_dataset.get_simulations_sizes()
no_norm_x=benchmark.train_dataset.get_no_normalization_axis_indices()
scalerParams={"chunk_sizes":chunk_sizes,"no_norm_x":no_norm_x}

torch_sim = TorchSimulator(name="packed_mlp",
                           model=PackedMLP,
                           scaler=StandardScalerIterative,
                           scalerParams=scalerParams,
                           log_path=None,
                           device="cuda:0",
                           seed=42,
                           bench_config_path=BENCH_CONFIG_PATH,
                           bench_config_name="Benchmark1",
                           sim_config_path=SIM_CONFIG_PATH,
                           sim_config_name="SMOOTH_G4",
                          )

torch_sim.restore(epoch=99, path="./models")
torch_sim.train(benchmark.train_dataset,
                epochs=20,
                save_path=None,
                pin_memory=True, 
                non_blocking=True, 
                num_workers=6
                )
torch_sim.visualize_convergence()

---

### Training

In [None]:
torch_sim.train(benchmark.train_dataset, 
                save_path=None, 
                epochs=3, 
                train_batch_size=128000,
                pin_memory=True, 
                non_blocking=True, 
                num_workers=6
                )

### Metric evaluation

In [None]:
torch_sim_metrics = benchmark.evaluate_simulator(augmented_simulator=torch_sim,
                                                  eval_batch_size=256000,
                                                  dataset="all",
                                                  shuffle=False,
                                                  save_path=".",
                                                  save_predictions=True
                                                 )

In [None]:
torch_sim_metrics["test"]