# Cenaero

In [1]:
import numpy as np

In [2]:
# Constants

NUM_SEQUENCES = 121
DATA_PATH = '../data/38Q31TzlO-{}/npz_data/data.npz'
PARAMS_PATH = '../data/38Q31TzlO-{}/Minamo_Parameters-Wall2D.txt'

In [3]:
# Data loading and parsing methods

def load_data(simulation_ids, flatten_sequences=False):
    
    inputs, targets = [], []
    
    for simulation_id in simulation_ids:

        data = np.load(DATA_PATH.format(simulation_id))

        # Unused
        # T_top = data['T_top']
        # x = data['x']
        # y = data['y']
        # temperatures = data['temperatures']

        # Input data
        time = data['time']
        delta = time.copy()
        delta[1:] = time[1:] - time[:-1]
        laser_position = data['laser_position_x']
        laser_power = data['laser_power']
        input = np.stack([time, delta, laser_position, laser_power], axis=1)

        # Target data
        target = np.stack([data['T{}'.format(i + 1)] for i in range(6)], axis=1)

        inputs.append(input)
        targets.append(target)
    
    if flatten_sequences:
        inputs = np.concatenate(inputs, axis=0)
        targets = np.concatenate(targets, axis=0)
        
    else:
        max_len = max(input.shape[0] for input in inputs)
        
        for i, input in enumerate(inputs):
            inputs[i] = np.pad(input, [(0, max_len - input.shape[0]), (0, 0)])
        for i, target in enumerate(targets):
            targets[i] = np.pad(target, [(0, max_len - target.shape[0]), (0, 0)])
        
        inputs = np.stack(inputs, axis=1)
        targets = np.stack(targets, axis=1)
    
    return inputs, targets


def load_params(simulation_ids):
    
    powers, break_times = [], []

    for simulation_id in simulation_ids:

        with open(PARAMS_PATH.format(simulation_id)) as params_file:
            lines = params_file.read().splitlines()
            powers.append(lines[0].split(' = ')[1])
            break_times.append(lines[1].split(' = ')[1])
            
    powers = np.array(powers)
    break_times = np.array(break_times)

    return powers, break_times

In [4]:
# Preview of the data

inputs, targets = load_data(range(1, 8 + 1), flatten_sequences=False)
powers, break_times = load_params(range(1, 8 + 1))

print('inputs:', inputs.shape)
print('targets:', targets.shape)

print('powers:', powers.shape)
print('break_times:', break_times.shape)

inputs: (1384, 8, 4)
targets: (1384, 8, 6)
powers: (8,)
break_times: (8,)


## Train, test and validation split

In [5]:
np.random.seed(20210831)
permutation = np.random.permutation(np.arange(1, NUM_SEQUENCES + 1))
first_split = int(0.5 * NUM_SEQUENCES)
second_split = int(0.75 * NUM_SEQUENCES)
train_sequence_ids = permutation[:first_split]
eval_sequence_ids = permutation[first_split:second_split]
test_sequence_ids = permutation[second_split:]

In [6]:
train_inputs, train_targets = load_data(train_sequence_ids, flatten_sequences=True)
eval_inputs, eval_targets = load_data(eval_sequence_ids, flatten_sequences=True)
test_inputs, test_targets = load_data(test_sequence_ids, flatten_sequences=True)

In [30]:
# Decision Tree training
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor(max_depth=8)
dt.fit(train_inputs, train_targets)

# Decision Tree evaluation
eval_preds = dt.predict(eval_inputs)
MSE = ((eval_preds - eval_targets) ** 2).sum() / eval_targets.shape[0]
print(MSE)

213162.6624341536


In [37]:
# Random Forest training
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=200, max_depth=8, n_jobs=-1)
rf.fit(train_inputs, train_targets)

# Random Forest evaluation
eval_preds = rf.predict(eval_inputs)
MSE = ((eval_preds - eval_targets) ** 2).sum() / eval_targets.shape[0]
print(MSE)

209330.11952089763
