# Packed Ensemble Application to the AirfRANS dataset

## Colab setup

Set `colab` to `True` if you wish to use it 

In [1]:
colab = False
if colab:
    from google.colab import drive

    drive.mount("/content/drive")
    !source/content/drive/MyDrive/my_colab_env/bin/activate
    import sys
    import os

    sys.path.append("/content/drive/MyDrive/my_colab_env/lib/python3.10/site-packages")
    os.chdir("/content/drive/MyDrive/ml4science/ml4physim_startingkit")

    sys.path.append(os.getcwd())

## Installation

Install the LIPS framework if it is not already done. For more information look at the LIPS framework [Github repository](https://github.com/IRT-SystemX/LIPS)

In [2]:
# !pip install -r requirements.txt
# or
# !pip install -U .


Install the AirfRANS package

In [3]:
# !pip install airfrans

## Imports

In [4]:
import numpy as np
import pandas as pd
import os
import pickle
import torch
import torch.nn.functional as F

from lips import get_root_path
from lips.dataset.scaler.standard_scaler import StandardScaler
from lips.benchmark.airfransBenchmark import AirfRANSBenchmark
from lips.dataset.airfransDataSet import download_data

from my_packed_ensemble import *
from my_packed_cv import *

## Generic Step (Load the required data) <a id='generic_step'></a>

In [5]:
# indicate required paths
LIPS_PATH = get_root_path()
DIRECTORY_NAME = '../ml4physim_startingkit/Dataset'
BENCHMARK_NAME = "Case1"
LOG_PATH = LIPS_PATH + "lips_logs.log"

Define the configuration files path, that aim to describe specific caracteristics of the use case or the augmented simulator.

In [6]:
BENCH_CONFIG_PATH = os.path.join("airfoilConfigurations", "benchmarks",
                                 "confAirfoil.ini")  #Configuration file related to the benchmark
SIM_CONFIG_PATH = os.path.join("airfoilConfigurations", "simulators", "torch_fc.ini")  #Configuration file re

Download the data

In [7]:
if not os.path.isdir(DIRECTORY_NAME):
    download_data(root_path=".", directory_name=DIRECTORY_NAME)

Loading the dataset using the dedicated class used by LIPS platform offers a list of advantages:

1. Ease the importing of datasets
1. A set of functions to organize the `inputs` and `outputs` required by augmented simulators


In [8]:
# Load the required benchmark datasets
try:
    with open('benchmark.pkl', 'rb') as f:
        benchmark = pickle.load(f)
except:
    benchmark = AirfRANSBenchmark(benchmark_path=DIRECTORY_NAME,
                                  config_path=BENCH_CONFIG_PATH,
                                  benchmark_name=BENCHMARK_NAME,
                                  log_path=LOG_PATH)
    benchmark.load(path=DIRECTORY_NAME)
    with open('benchmark.pkl', 'wb') as f:
        pickle.dump(benchmark, f)

## Model selection (Cross validation)

Importing the necessary dependencies, as well as the `packed_ensemble` methods

Create cross validation on hyperparameters of the model defined by ``param_grid``

In [9]:
param_grid = {
    'hidden_sizes': [(48, 128, 48), (128, 256, 128)],
    'dropout': [True, False],
    "alpha": [2, 4],
    "gamma": [2, 4],
    "M": [4],
    'lr': [1e-2, 1e-3]
}

The `param_grid` will be divided in 3 partitions, each one will be executed on a different machine.

- Anton - partition 0
- Anthony - partition 1
- Alexi - partition 2

Change it in the cell below.

In [None]:
partition = 0
device="cuda" if torch.cuda.is_available() else "cpu"

# hyperparameter tuning using CV
hyperparameters_tuning(benchmark=benchmark, param_grid=param_grid, k_folds=4, num_epochs=100, batch_size=1280000, shuffle=True, n_workers=6,
                        scaler=StandardScaler(), partition=partition, verbose=False, size_scale=0.3, device=device)

<br></br>

---

## Model training

In [None]:
input_size, output_size = infer_input_output_size(benchmark.train_dataset)

In [None]:
# device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = PackedMLP(input_size=input_size,
                  output_size=output_size,
                  hidden_sizes=(50, 100, 50),
                  activation=F.relu,
                  device=device,
                  dropout=True,
                  )
model.to(device)
model.device

In [None]:
train_loader = model.process_dataset(benchmark.train_dataset, training=True, n_workers=6)

In [None]:
print(model)

In [None]:
model, train_losses, _ = train(model, train_loader, epochs=1, device=device, lr=3e-4)

##### prediction on `test_dataset`
This dataset has the same distribution as the training set

In [None]:
predictions, observations = predict(model, benchmark._test_dataset, device=device)

In [None]:
print("Prediction dimensions: ", predictions["x-velocity"].shape, predictions["y-velocity"].shape,
      predictions["pressure"].shape, predictions["turbulent_viscosity"].shape)
print("Observation dimensions:", observations["x-velocity"].shape, observations["y-velocity"].shape,
      observations["pressure"].shape, observations["turbulent_viscosity"].shape)
print("We have good dimensions!")

In [None]:
from lips.evaluation.airfrans_evaluation import AirfRANSEvaluation

evaluator = AirfRANSEvaluation(config_path=BENCH_CONFIG_PATH,
                               scenario=BENCHMARK_NAME,
                               data_path=DIRECTORY_NAME,
                               log_path=LOG_PATH)

observation_metadata = benchmark._test_dataset.extra_data
metrics = evaluator.evaluate(observations=observations,
                             predictions=predictions,
                             observation_metadata=observation_metadata)
print(metrics)

##### Prediction on `test_ood_dataset`
This dataset has a different distribution in comparison to the training set.

In [None]:
predictions, observations = predict(model, benchmark._test_ood_dataset, device=device)
evaluator = AirfRANSEvaluation(config_path=BENCH_CONFIG_PATH,
                               scenario=BENCHMARK_NAME,
                               data_path=DIRECTORY_NAME,
                               log_path=LOG_PATH)

metrics = evaluator.evaluate(observations=observations,
                             predictions=predictions,
                             observation_metadata=observation_metadata)
print(metrics)