In [21]:
# Module Importations
import numpy as np
import pandas as pd
import sklearn
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score

# Print versioning information
print(sklearn.__version__)

0.22.2.post1


In [3]:
# Project Module Importations
from src.data import load_data
from src.data import split_data
from src.models import model_evaluation 

In [4]:
# Load dataset
original_dataset = load_data.load_motor_data()

In [8]:
# Split data into training / evaluation sets
training_set, evaluation_set = split_data.split_train_test(original_dataset, 0.8)

In [9]:
# Drop profile id data (both sets)
training_set = training_set.drop("profile_id", axis = 1)
print(training_set.describe())

evaluation_set = evaluation_set.drop("profile_id", axis = 1)

ambient        coolant            u_d            u_q  \
count  199614.000000  199614.000000  199614.000000  199614.000000   
mean       -0.005043       0.005308       0.003792      -0.005359   
std         0.996490       1.002488       0.998747       1.001464   
min        -5.239872      -1.270450      -1.654002      -1.852853   
25%        -0.601386      -1.037872      -0.831843      -0.924537   
50%         0.266671      -0.178200       0.267391      -0.098490   
75%         0.686838       0.673512       0.358587       0.848608   
max         2.954662       2.296845       2.273808       1.788773   

         motor_speed         torque            i_d            i_q  \
count  199614.000000  199614.000000  199614.000000  199614.000000   
mean       -0.004852      -0.001687       0.004184      -0.001570   
std         1.001954       0.998525       0.999021       0.998389   
min        -1.353747      -3.339106      -3.235659      -3.329749   
25%        -0.951892      -0.267419      -0.75

In [35]:
# Create targets dataset (Stator yoke, tooth and winding temperature)

# Training Set
stator_training_data = training_set.drop(["stator_yoke", "stator_tooth", "stator_winding"], axis = 1)

stator_label_data = training_set[["stator_yoke", "stator_tooth", "stator_winding"]].copy()

# Evaluation Set
stator_eval_data = evaluation_set.drop(["stator_yoke", "stator_tooth", "stator_winding"], axis = 1)

stator_eval_label = evaluation_set[["stator_yoke", "stator_tooth", "stator_winding"]].copy()

In [30]:
# Evaluation metric (mean of ranges)

def return_column_range(dataframe, column_name):
    column_range = dataframe[column_name].max() - dataframe[column_name].min()
    return column_range

def ranges_mean(ranges_list):
    ranges_mean_value = sum(ranges_list) / len(ranges_list)
    return ranges_mean_value

# Claculate mean of each data column
ranges_sum = []

range_yoke = return_column_range(stator_label_data, "stator_yoke")
ranges_sum.append(range_yoke)

range_tooth = return_column_range(stator_label_data, "stator_tooth")
ranges_sum.append(range_tooth)

range_winding = return_column_range(stator_label_data, "stator_winding")
ranges_sum.append(range_winding)
print(ranges_sum)

# Claculate mean of ranges
ranges_mean = ranges_mean(ranges_sum)
print(ranges_mean)

[4.2768993, 4.3885418000000005, 4.6529601]
4.4394670666666665


In [37]:
# Linear Regression Model (Target - Yoke/Tooth/Winding Temperature)
lin_reg = LinearRegression()
lin_reg.fit(stator_training_data, stator_label_data)

# Compute RMSE via cross valiation
scores_mse = cross_val_score(lin_reg, stator_training_data, stator_label_data, scoring = "neg_mean_squared_error", cv = 5)
scores_rmse = np.sqrt(-scores_mse)

# Compute MAE via cross validation
scores_mae = cross_val_score(lin_reg, stator_training_data, stator_label_data, scoring = "neg_mean_absolute_error", cv = 5)
scores_mae = -1 * scores_mae

# Print CV results
model_name = "(Multi-Output) Linear Regression"
print(model_name, "rmse mean (cv):", scores_rmse.mean())
print(model_name, "rmse std (cv):", scores_rmse.std())
print(model_name, "Percent Err (cv):", ((scores_rmse.mean()/ranges_mean)*100))

print(model_name, "mae mean (cv):", scores_mae.mean())
print(model_name, "mae std (cv):", scores_mae.std())
print(model_name, "Percent Err (cv):", ((scores_mae.mean()/ranges_mean)*100))

# Evaluation
stator_pred_eval = lin_reg.predict(stator_eval_data)
model_evaluation.evaluate_model(model_name, stator_eval_label, stator_pred_eval)

(Multi-Output) Linear Regression rmse mean (cv): 0.38052400693843447
(Multi-Output) Linear Regression rmse std (cv): 0.002518489673301722
(Multi-Output) Linear Regression Percent Err (cv): 8.571389340751377
(Multi-Output) Linear Regression mae mean (cv): 0.27681278148007504
(Multi-Output) Linear Regression mae std (cv): 0.0013704624881080666
(Multi-Output) Linear Regression Percent Err (cv): 6.235270525115471
(Multi-Output) Linear Regression rmse (Eval): 0.38016795502001005
(Multi-Output) Linear Regression mae (Eval): 0.2765778162024338
(Multi-Output) Linear Regression r2 (Eval): 0.8551919747563934
