# Packed ensemble submission process

## Imports

In [1]:
import time

import numpy as np
import pandas as pd
import os
import pickle
import torch
import torch.nn.functional as F

from lips import get_root_path
from lips.dataset.scaler.standard_scaler import StandardScaler
from lips.benchmark.airfransBenchmark import AirfRANSBenchmark
from lips.dataset.airfransDataSet import download_data
from lips.augmented_simulators.torch_simulator import TorchSimulator
from lips.dataset.scaler.standard_scaler_iterative import StandardScalerIterative

from my_augmented_simulator import *

## Generic Step (Load the required data) <a id='generic_step'></a>

In [2]:
# indicate required paths
LIPS_PATH = get_root_path()
DIRECTORY_NAME = '../Dataset'
BENCHMARK_NAME = "Case1"
LOG_PATH = LIPS_PATH + "lips_logs.log"

Define the configuration files path, that aim to describe specific caracteristics of the use case or the augmented simulator.

In [3]:
BENCH_CONFIG_PATH = os.path.join("..", "airfoilConfigurations", "benchmarks",
                                 "confAirfoil.ini")  #Configuration file related to the benchmark
SIM_CONFIG_PATH = r"config.ini"  #Configuration file re

Download the data

In [4]:
if not os.path.isdir(DIRECTORY_NAME):
    download_data(root_path=".", directory_name=DIRECTORY_NAME)

Loading the dataset using the dedicated class used by LIPS platform offers a list of advantages:

1. Ease the importing of datasets
1. A set of functions to organize the `inputs` and `outputs` required by augmented simulators


In [5]:
# Load the required benchmark datasets
def load_dataset():
    """
    Load the airfrans dataset as a benchmark object

    Returns
    -------
    benchmark : AirfRANSBenchmark
        The airfrans benchmark object
    """
    try:
        with open('benchmark.pkl', 'rb') as f:
            benchmark = pickle.load(f)
    except:
        benchmark = AirfRANSBenchmark(benchmark_path=DIRECTORY_NAME,
                                    config_path=BENCH_CONFIG_PATH,
                                    benchmark_name=BENCHMARK_NAME,
                                    log_path=LOG_PATH)
        benchmark.load(path=DIRECTORY_NAME)
        with open('benchmark.pkl', 'wb') as f:
            pickle.dump(benchmark, f)
    
    return benchmark

#benchmark = load_dataset()

## Simulation

### Model creation

In [6]:
def simulate(config_names):
    """
    Creates packed MLP models for each model defined by config_names, trains them and evaluates them on the test dataset.
    The results are then saved in appropriate files (results + models).

    Parameters
    ----------
    config_names : list
        List of the names of the configurations to be used for training the model.

    Returns
    -------
    int
        0 if we have successfully trained and evaluated the model.
    """
    
    for config_name in config_names:
        print("Config name : ", config_name)
        print("loading data...")
        benchmark = load_dataset()

        chunk_sizes=benchmark.train_dataset.get_simulations_sizes()
        no_norm_x=benchmark.train_dataset.get_no_normalization_axis_indices()
        scalerParams={"chunk_sizes":chunk_sizes,"no_norm_x":no_norm_x}

        name = "packed_mlp"

        print("defining model...")
        # PackedMLP model definition 
        torch_sim = TorchSimulator(name=name,
                           model=PackedMLP,
                           scaler=StandardScalerIterative,
                           scalerParams=scalerParams,
                           log_path=None,
                           device="cuda:0",
                           seed=42,
                           bench_config_path=BENCH_CONFIG_PATH,
                           bench_config_name="Benchmark1",
                           sim_config_path=SIM_CONFIG_PATH,
                           sim_config_name=config_name,
                          )
        
        print("training...")
        # model training 
        start = time.perf_counter()
        torch_sim.train(benchmark.train_dataset, 
                save_path=None,
                pin_memory=True, 
                non_blocking=True, 
                num_workers=6
                )
        end = time.perf_counter()
        train_time = end-start
        
        print("saving model...")
        # saving the model 
        torch_sim.save(path="./models_new")

        print("evaluating model...")
        # evaluating the model 
        start = time.perf_counter()
        torch_sim_metrics = benchmark.evaluate_simulator(augmented_simulator=torch_sim,
                                                  eval_batch_size=256000,
                                                  dataset="all",
                                                  shuffle=False,
                                                  save_path="./evaluations",
                                                  save_predictions=True
                                                 )
        end = time.perf_counter()
        evaluation_time = end-start
        
        # save the evaluation time to file
        with open(f"evaluations/{name}_{config_name}/time.txt", "a") as f:
            f.write(f"Training took {train_time:.2f} seconds\n")
            f.write(f"Evaluation took {evaluation_time:.2f} seconds")
        
        del benchmark
    return 0

In [7]:
config_names = ["DEEP_SMALL_A6_DECAY", "DEEP_SMALL_A8_DECAY"]
simulate(config_names)

Config name :  DEEP_SMALL_A6_DECAY
loading data...
defining model...
training...


KeyboardInterrupt: 