In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
import sklearn.metrics as skm

from utils import data, plot, evaluate

In [None]:
data_dir = 'data/CMAPSS'
subset = 1
train, test, header, n_meta, n_sensors, n_settings = data.load_cmapss_subset(
    data_dir, subset)

In [None]:
# Set dataset parameters
drop_sensors = [1, 5, 10, 16, 18, 19]
drop_sensors = None
include_settings = None
params = {
    'drop_sensors': drop_sensors,
    'include_settings': include_settings,
    'smooth': 0,
    'lag': 0,
    'clip': None
}

# Build dataset
X, y, cv = data.build_dataset(train, cv_folds=5, **params)
X_test, y_test, _ = data.build_dataset(test, cv_folds=None, test=True, **params)
print('Samples: {} - Features: {}\n---'.format(*X.shape))

# Define model pipeline: scaler + regressor
pipeline = Pipeline([('scaler', MinMaxScaler()), ('reg', LinearRegression())])

# Evaluate on cross-validation set
reg, val_train_scores, val_test_scores = evaluate.cross_validate(X, y, cv, pipeline)
print('CV Training - MAPE={:.3f} - MAE={:.3f} - MSE={:.3f} - RMSE={:.3f} - R2={:.3f}'.format(*val_train_scores))
print('CV Testing - MAPE={:.3f} - MAE={:.3f} - MSE={:.3f} - RMSE={:.3f} - R2={:.3f}'.format(*val_test_scores))

# Evaluate on test set
y_pred = pipeline.predict(X_test)
print('Test Set - MAPE={:.3f} - MAE={:.3f} - MSE={:.3f} - RMSE={:.3f} - R2={:.3f}'.format(*evaluate.metrics(y_test, y_pred)))
    
fig, ax = plt.subplots()
ax.plot(y_test, y_pred, '.')
ax.set_ylabel('Predictions')
ax.set_xlabel('Ground Truth')
ax.set_title('Test Set Predictions')
fig.show()