In [10]:
# Module Importations
import numpy as np
import pandas as pd
import sklearn
import time
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

# Print versioning information
print(sklearn.__version__)

0.22.2.post1


In [2]:
# Project Module Importations
from src.data import load_data
from src.data import split_data
from src.features import hyperparameter_opt
from src.models import model_evaluation
from src.models import sklearn_helpers

In [3]:
# Load dataset
original_dataset = load_data.load_motor_data()

In [4]:
# Split data into training / evaluation sets
train_set, eval_set = split_data.split_train_test(original_dataset, 0.8)

In [5]:
# Drop profile id data column (both sets)
train_set = train_set.drop("profile_id", axis = 1)

eval_set = eval_set.drop("profile_id", axis = 1)

In [6]:
# Create torque target datasets
torque_train_data = train_set.drop("torque", axis = 1)
torque_label_data = train_set["torque"].copy()

torque_eval_data = eval_set.drop("torque", axis = 1)
torque_eval_label = eval_set["torque"].copy()

In [7]:
# Evaluation Metric (Range)

# Calculate arithmetic range of torque readings
torque_range = original_dataset["torque"].max() - original_dataset["torque"].min()

print("Torque range:", torque_range)

Torque range: 6.3629233


In [11]:
# Lin Reg Model (Target: Torque, Data: All)
lin_reg = LinearRegression()
lin_reg.fit(torque_train_data, torque_label_data)

# Name Model
model_type = "LinRegr_Torque"
model_name = sklearn_helpers.name_model(model_type)

# Save Model
#sklearn_helpers.save_model(lin_reg, model_name)

# Compute RMSE via cross validation
scores_mse = cross_val_score(lin_reg, torque_train_data, torque_label_data, scoring = "neg_mean_squared_error", cv = 5)
scores_rmse = np.sqrt(-scores_mse)

# Compute MAE via cross validation
scores_mae = cross_val_score(lin_reg, torque_train_data, torque_label_data, scoring = "neg_mean_absolute_error", cv = 5)
scores_mae = -1 * scores_mae

# Print cross validation results
model_name = "Linear Regression"
print(model_name, "rmse mean (cv):", scores_rmse.mean())
print(model_name, "Percent Err (cv):", ((scores_rmse.mean()/torque_range) * 100))
print(model_name, "mae mean (cv):", scores_mae.mean())
print(model_name, "Percent Err (cv):", ((scores_mae.mean()/torque_range) * 100))

# Evaluate model
start_time = time.process_time()
torque_pred_eval = lin_reg.predict(torque_eval_data)
end_time = time.process_time()
print("Prediction Duration (s):", (end_time - start_time))

model_evaluation.evaluate_model(model_name, torque_eval_label, torque_pred_eval)

Linear Regression rmse mean (cv): 0.06137364641945211
Linear Regression Percent Err (cv): 0.9645510958689713
Linear Regression mae mean (cv): 0.041007579675782946
Linear Regression Percent Err (cv): 0.6444770389701686
Prediction Time: 0.03125
Linear Regression rmse (Eval): 0.0617094388532186
Linear Regression mae (Eval): 0.040985584127677104
Linear Regression r2 (Eval): 0.9961747621952894


In [9]:
# Polynomial Regression Model (Target - Torque)
lin_reg_poly = LinearRegression()

# Create polynomial dataset
poly_features = PolynomialFeatures(degree = 2, include_bias = False)
torque_train_poly = poly_features.fit_transform(torque_train_data)

lin_reg_poly.fit(torque_train_poly, torque_label_data)

# Name model
model_type = "LinRegrPoly_Torque"
model_name = sklearn_helpers.name_model(model_type)

# Save model
sklearn_helpers.save_model(lin_reg_poly, model_name)

# Compute RMSE via cross validation
scores_mse = cross_val_score(lin_reg_poly, torque_train_poly, torque_label_data, scoring = "neg_mean_squared_error", cv = 5)
scores_rmse = np.sqrt(-scores_mse)

# Compute MAE via cross validation
scores_mae = cross_val_score(lin_reg_poly, torque_train_poly, torque_label_data, scoring = "neg_mean_absolute_error", cv = 5)
scores_mae = -1 * scores_mae

# Print results
model_name = "Linear Regression - Polynomial"
print(model_name, "rmse mean (cv):", scores_rmse.mean())
print(model_name, "mae mean (cv):", scores_mae.mean())

# Evaluate model
torque_eval_poly = poly_features.fit_transform(torque_eval_data)

start_time = time.process_time()
torque_pred_eval = lin_reg_poly.predict(torque_eval_poly)
end_time = time.process_time()
print("Prediction Duration (s):", (end_time - start_time))

model_evaluation.evaluate_model(model_name, torque_eval_label, torque_pred_eval)

Save Path: C:\Developer\electric_motor_thermal_modelling\Models\YC_LinRegrPoly_Torque_2020_09_14-15_59_01.pkl
Linear Regression - Polynomial rmse mean (cv): 0.018356550663283684
Linear Regression - Polynomial mae mean (cv): 0.008321049111748923
Prediction Duration (s): 0.40625
Linear Regression - Polynomial rmse (Eval): 0.019856915850197183
Linear Regression - Polynomial mae (Eval): 0.008298359441527431
Linear Regression - Polynomial r2 (Eval): 0.9996039237189978


In [None]:
# Linear Regression - PCA (Target - Torque)
lin_reg_pca = LinearRegression()

# Create PCA dataset
pca = PCA(n_components = 3)
torque_train_data = pca.fit_transform(torque_train_data)

# Name model
model_type = "LinRegPCA_Torque"
model_name = sklearn_helpers.name_model(lin_reg_pca)

# Save model
sklearn_helpers.save_model(lin_reg_pca, model_name)

# Compute RMSE via cross validation
scores_mse = cross_val_score(lin_reg_pca, torque_train_data, torque_label_data, scoring = "neg_mean_squared_error", cv = 5)
scores_rmse = np.sqrt(-1 * scores_mse)

# Compute MAE via cross validation
scores_mae = cross_val_score(lin_reg_pca, torque_train_data, torque_label_data, scoring = "neg_mean_absolute_error", cv = 5)
scores_mae = scores_mae * -1

# Print results
model_name = "Linear Regression - PCA"
print(model_name, "rmse mean (cv)", scores_rmse.mean())
print(model_name, "mas mean (cv)", scores_mae.mean())

# Evaluate model
torque_eval_pca = pca.fit_transform(torque_eval_data)

start_time = time.process_time()
torque_pred_eval = lin_reg_pca.predict(torque_eval_pca)
end_time = time.process_time()
print("Prediction Duration (s):", (end_time - start_time))

model_evaluation.evaluate_model(model_name, torque_eval_label, torque_eval_pca)