In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
import time
from autogluon.tabular import TabularDataset, TabularPredictor

In [2]:
alt_x = np.load('../nets/net_18_data/measured_data_x_alt.npy')
alt_y = np.load('../nets/net_18_data/data_y_alt.npy')
data_x = alt_x
data_y = alt_y

split_train = int(0.8 * data_x.shape[0])
train_x = data_x[:split_train, :]
train_y = data_y[:split_train, :]

train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2, shuffle=True, random_state=42)

train_x, test_x, train_y, test_y = train_test_split(train_x, train_y, test_size=0.3, shuffle=True, random_state=42)


In [3]:
num_input = 53
num_output = 18

in_columns = [str(i) for i in range(num_input)]
out_columns = [str(i) for i in range(num_input, num_input + num_output)]

In [None]:
train_models = True
if train_models:
    for i in range(num_output):
        print(f'Training for output #{i}')
        train = np.hstack((train_x, train_y[:, i].reshape(-1, 1)))
        columns_names = in_columns + [out_columns[i]]
        train = pd.DataFrame(train, columns=columns_names)
        
        x = in_columns
        y = out_columns[i]
        label = y
        model_path = f'./autogluon_models/model_{i}'
        
        predictor = TabularPredictor(label=label, problem_type='regression', eval_metric='mean_squared_error', path=model_path).fit(train, time_limit=540)


In [None]:
'''
test_predictions = []
for i in range(num_output):
    columns_names = in_columns + [out_columns[i]]
    x = in_columns
    y = out_columns[i]
    
    test = np.hstack((test_x, test_y[:, i].reshape(-1, 1)))
    test = h2o.H2OFrame(test, column_names=columns_names)
    
    model_path = f'./autogluon_models/model_{i}/'
    files = os.listdir(model_path)
    model_filename = [f for f in files if os.path.isfile(os.path.join(model_path, f))][0]
    
    aml = h2o.load_model(f'{model_path}/{model_filename}')
    try:
        preds = aml.leader.predict(test)
    except:
        preds = aml.predict(test)
    test_predictions.append(preds['predict'])
    perf = aml.model_performance(test)
    print(f"MSE for model {i}: {perf._metric_json['MSE']}")
    print('---------------------------------')
'''


In [4]:
models = []
for i in range(num_output):
    model_path = f'./autogluon_models/model_{i}/'
    files = os.listdir(model_path)
    #model_filename = [f for f in files if os.path.isfile(os.path.join(model_path, f))][0]
    label = out_columns[i]
    
    predictor = TabularPredictor(label=label, problem_type='regression', eval_metric='mean_squared_error', path=model_path).load(f'{model_path}')
    models.append(predictor)
    print(f'Model {i} loaded')



Model 0 loaded
Model 1 loaded
Model 2 loaded
Model 3 loaded
Model 4 loaded
Model 5 loaded
Model 6 loaded
Model 7 loaded
Model 8 loaded
Model 9 loaded
Model 10 loaded
Model 11 loaded
Model 12 loaded
Model 13 loaded
Model 14 loaded
Model 15 loaded
Model 16 loaded
Model 17 loaded


In [5]:
from net18.scenarios2 import get_data_by_scenario_and_case, report_preds_on_validation_files

std_results = []
for scenario in range(1, 6):
    for case in range(1, 4):
        print(f'SCENARIO {scenario}, CASE {case} VALIDATION')
        s1_c1_data = get_data_by_scenario_and_case(scenario, case)
        x = s1_c1_data[0]
        x_hat = s1_c1_data[1]
        y_all = s1_c1_data[2]
        y_hat_all = s1_c1_data[3]
        
        estim = []
        for i in range(num_output):
            columns_names = in_columns + [out_columns[i]]
            x = in_columns
            y = out_columns[i]
            
            predictor = models[i]
            test_x = x_hat
            test_y = np.asarray(y_all[0][i]).reshape(-1, 1)
            test = pd.DataFrame(np.hstack((test_x, test_y)), columns=columns_names)
            
            
            preds = predictor.predict(test)
            
            estim.append(preds[0])
            
        pred = np.asarray(estim)
        report_preds_on_validation_files(pred, 8, 'autogloun', scenario, case=case)
        if case == 1:
            std_results.append(f'std: {np.sqrt(np.mean(np.square(y_all - pred)))}')
print(std_results)


SCENARIO 1, CASE 1 VALIDATION
SCENARIO 1, CASE 2 VALIDATION
SCENARIO 1, CASE 3 VALIDATION
SCENARIO 2, CASE 1 VALIDATION
SCENARIO 2, CASE 2 VALIDATION
SCENARIO 2, CASE 3 VALIDATION
SCENARIO 3, CASE 1 VALIDATION
SCENARIO 3, CASE 2 VALIDATION
SCENARIO 3, CASE 3 VALIDATION
SCENARIO 4, CASE 1 VALIDATION
SCENARIO 4, CASE 2 VALIDATION
SCENARIO 4, CASE 3 VALIDATION
SCENARIO 5, CASE 1 VALIDATION
SCENARIO 5, CASE 2 VALIDATION
SCENARIO 5, CASE 3 VALIDATION
['std: 0.007678616614487961', 'std: 0.005597705108611525', 'std: 0.0037181661130499546', 'std: 0.0036518991148935284', 'std: 0.0016187131741629084']


In [7]:
pred = np.asarray(estim)
print(f'std: {np.sqrt(np.mean(np.square(y_all - pred)))}')

std: 0.008439554785042516
