In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression
import pickle

#Colab use
#from google.colab import files
#uploaded = files.upload()
#import io

In [2]:
def evaluate(y_true, y_pred, label='test'):
    mse = mean_squared_error(y_true, y_pred) #calculate MSE
    rmse = np.sqrt(mse) #calculate RMSE
    variance = r2_score(y_true, y_pred) #calculate R2
    print('{} set RMSE:{}, R2:{}'.format(label, rmse, variance))

# Load Data

In [3]:
index_names = ['ID', 'Cycle']
setting_names = ['OpSet1', 'OpSet2', 'OpSet3']
sensor_names = ['SensorMeasure{}'.format(i) for i in range(1,22)] 
col_names = index_names + setting_names + sensor_names

train_data = pd.read_csv("data/train_set.csv")
test_data = pd.read_csv("data/test_set.csv")
true_RUL = pd.read_csv("data/RUL_FD001.txt", sep='\s+', header = None)

#Colab use
#train_data = pd.read_csv(io.BytesIO(uploaded['train_set.csv']))
#test_data = pd.read_csv(io.BytesIO(uploaded['test_set.csv']))
#true_RUL = pd.read_csv(io.BytesIO(uploaded['RUL_FD001.txt']), sep='\s+', header = None)

train_RUL = train_data['RUL']
test_RUL = test_data['RUL']

train_data = train_data.drop(['RUL'], 1)
test_data = test_data.drop(['RUL'], 1)

test_data = test_data.groupby(['ID'])
test_data = test_data.tail(1)

#assign to new variable for easy understanding
train = train_data
train_y = train_RUL
test = test_data.groupby(['ID']).tail(1) #get the last record for each engine
test_y = true_RUL

#only sensor value considered
train = train[sensor_names]
test = test[sensor_names]

# Zero R Algorithm

In [4]:
def zero_rule_algorithm(X_train, X_test, y_train, y_test):

    prediction = sum(y_train) / len(y_train) #get average
    
    #average as prediction
    y_pred_train = [prediction for i in range(len(X_train))] 
    y_pred_test = [prediction for i in range(len(X_test))]
    
    #--------------------------------RMSE & R2-------------------------------
    evaluate(y_train, y_pred_train, 'Train')
    evaluate(y_test, y_pred_test, 'Test')
    #------------------------------------------------------------------------

# Linear Regression

In [5]:
def linear_regression(X_train, X_test, y_train, y_test):
    
    #for reproducible result
    np.random.seed(2)
    
    #------------------------------Train Model-------------------------------
    LR = LinearRegression()
    
    LR.fit(X_train, y_train) #train model
    #------------------------------------------------------------------------
    
    #------------------------------Predict X---------------------------------
    y_pred_train = LR.predict(X_train) #predict on train data
    y_pred_test = LR.predict(X_test) #predict on test data
    #------------------------------------------------------------------------
    
    #---------------------------------Accuracy-------------------------------
    # Use score method to get accuracy of model
    accuracy_score = LR.score(X_test, y_test)
    print('Accuracy of Linear Regression on test set: {:.2f}'.format(accuracy_score))
    #------------------------------------------------------------------------
    
    #---------------------------------RMSE & R2------------------------------
    evaluate(y_train, y_pred_train, 'Train')
    evaluate(y_test, y_pred_test, 'Test')
    #------------------------------------------------------------------------
    
    filename = '../Dashboard UI.model/base_model.sav'
    #pickle.dump(LR, open(filename, 'wb')) #save linear regression model
    
    #Colab use
    #filename = 'base_model.sav'
    #pickle.dump(LR, open(filename, 'wb')) #save linear regression model
    #files.download('base_model.sav')

# Working

In [6]:
zero_rule_algorithm(train, test, train_y, test_y)

Train set RMSE:68.87932080043385, R2:0.0
Test set RMSE:52.62485752612688, R2:-0.6036970189556603


In [7]:
linear_regression(train, test, train_y, test_y)

Accuracy of Linear Regression on test set: 0.41
Train set RMSE:44.662806660265844, R2:0.5795500450752381
Test set RMSE:31.94977455636664, R2:0.408879457833612
