In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
from sklearn.metrics import precision_score, mean_squared_error, r2_score, make_scorer, adjusted_rand_score, \
                    accuracy_score, f1_score, confusion_matrix, classification_report, roc_auc_score, recall_score
from time import time
from sklearn.preprocessing import MinMaxScaler
import scipy.stats as st
from sklearn.feature_selection import RFE, RFECV, SelectKBest, mutual_info_regression
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
import pprint as pp
%matplotlib inline

In [None]:
Chiller_Data = pd.read_excel('Refrigerant_Leak_Fault_Data.xlsx')

In [None]:
Chiller_Data = Chiller_Data.loc[Chiller_Data['kW'] != 1.682000e-45]
Chiller_Data.reset_index(drop=True, inplace=True)

In [None]:
Chiller_Data['Target_EPS'] = (Chiller_Data['TRC_sub'])/(Chiller_Data['TRC']-Chiller_Data['TCI'])
Chiller_Data['Target_LMTD'] = (Chiller_Data['TCO']-Chiller_Data['TCI'])/np.log((Chiller_Data['TRC']-Chiller_Data['TCI'])/(Chiller_Data['TRC']-Chiller_Data['TCO']))
Chiller_Data['Lag1'] = (Chiller_Data['Target_EPS'].shift(1))
Chiller_Data['Lag2'] = (Chiller_Data['Target_LMTD'].shift(1))
Chiller_Data.dropna(axis=0,inplace=True)
#Time_data = Chiller_Data['Time (minutes)']

In [None]:
y = Chiller_Data[['Target_EPS','Target_LMTD']].as_matrix()
True_Labels = Chiller_Data['Label'].as_matrix()
Chiller_Data.drop(['Target_EPS','Target_LMTD','Label','Time (minutes)'], axis=1, inplace=True)
X = Chiller_Data.as_matrix()
Feature_Names = list(Chiller_Data)

In [None]:
#################################################################################################
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.55, shuffle=False)
TL_train, TL_Test = train_test_split(True_Labels, test_size=0.55, shuffle=False)
#DT_train, DT_Test = train_test_split(Time_data, test_size=0.55, shuffle=False)
#################################################################################################

In [None]:
def calc_dyn_threshold(A_EPS, P_EPS, A_LMTD, P_LMTD, I, N):
    # Control false alarm rates by tuning I and N. eg. increase I or N to reduce false alarms
    threshold_EPS = np.zeros(I-1)
    threshold_EPS[0:(I-1)] = P_EPS[0:(I-1)]
    threshold_LMTD = np.zeros(I-1)
    threshold_LMTD[0:(I-1)] = P_LMTD[0:(I-1)]
    labels = np.zeros(I-1)
    for k in np.arange(I,len(P_EPS)+1):
        mu_EPS = np.mean(P_EPS[(k-I):k])
        sigma_EPS = np.std(P_EPS[(k-I):k])
        T_EPS = mu_EPS - N*sigma_EPS
        threshold_EPS = np.append(threshold_EPS,T_EPS)
        mu_LMTD = np.mean(P_LMTD[(k-I):k])
        sigma_LMTD = np.std(P_LMTD[(k-I):k])
        T_LMTD = mu_LMTD - N*sigma_LMTD
        threshold_LMTD = np.append(threshold_LMTD,T_LMTD)
        
        if (A_EPS[k-1] < threshold_EPS[k-1] or A_LMTD[k-1] < threshold_LMTD[k-1]) :
            labels = np.append(labels,1)
        else:
            labels = np.append(labels,0)
    return labels, threshold_EPS, threshold_LMTD

In [None]:
t0 = time()
np.random.seed(7)
########################################################################################
# Regression
kf = KFold(n_splits=10, shuffle=True, random_state=7)
scoring_param = make_scorer(mean_squared_error,greater_is_better=False)
Y_Test_Pred_scaled = np.zeros((len(y_test),2))

scaler = MinMaxScaler()
scaler.fit(y_train)
y_train_scaled = scaler.transform(y_train)

estimators = []
estimators.append(('standardize', MinMaxScaler()))
estimators.append(('FS', SelectKBest(mutual_info_regression)))
estimators.append(('SVM', SVR()))
pipe = Pipeline(estimators)
    
p_grid = dict(FS__k = [8, 16],
              SVM__gamma = np.logspace(-3, 0, 4),
              SVM__C = np.logspace(0, 3, 4))
    
model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)
model.fit(X_train, y_train_scaled[:,0])
    
params = model.best_params_
print("Best best k: %s Best gamma: %f Best C: %s" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))
    
Y_Test_Pred_scaled[:,0] = model.predict(X_test)

model = GridSearchCV(estimator = pipe, param_grid = p_grid, scoring = scoring_param, cv = kf, n_jobs=-1)
model.fit(X_train, y_train_scaled[:,1])
    
params = model.best_params_
print("Best best k: %s Best gamma: %f Best C: %s" % (params['FS__k'], params['SVM__gamma'], params['SVM__C']))
    
Y_Test_Pred_scaled[:,1] = model.predict(X_test)
Y_Test_Pred = scaler.inverse_transform(Y_Test_Pred_scaled)

P_EPS = Y_Test_Pred[:,0]
P_LMTD = Y_Test_Pred[:,1]
    
Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 2, 2)
Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})

print("########################################################################################")
print("Confusion Matrix - testing:")
print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))
tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()
print("True Negative, False Positive, False Negative, True Positive {}.".format([tn, fp, fn, tp]))
print("False positive means false alarms")
print("False Negative means missed faults")
print("########################################################################################")
print("Classification Report - testing:")
print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))
print("########################################################################################")
print("Accuracy - testing: %0.3f" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))
print("########################################################################################")
print("ROC AUC score - testing: %0.3f" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))
print("########################################################################################")
########################################################################################
    
t1 = time()
print('Time taken for this trial %f' %(t1-t0))

In [None]:
Labels, Threshold_EPS, Threshold_LMTD = calc_dyn_threshold(y_test[:,0], P_EPS, y_test[:,1], P_LMTD, 30, 2)
Temp = pd.DataFrame(data={'Labels':TL_Test, 'Pred_Labels': Labels})

print("########################################################################################")
print("Confusion Matrix - testing:")
print(confusion_matrix(Temp['Labels'], Temp['Pred_Labels']))
tn, fp, fn, tp = confusion_matrix(Temp['Labels'], Temp['Pred_Labels']).ravel()
print("True Negative, False Positive, False Negative, True Positive {}.".format([tn, fp, fn, tp]))
print("False positive means false alarms")
print("False Negative means missed faults")
print("########################################################################################")
print("Classification Report - testing:")
print(classification_report(Temp['Labels'], Temp['Pred_Labels'], target_names=['Normal', 'Fault']))
print("########################################################################################")
print("Accuracy - testing: %0.3f" % accuracy_score(Temp['Labels'], Temp['Pred_Labels']))
print("########################################################################################")
print("ROC AUC score - testing: %0.3f" % roc_auc_score(Temp['Labels'], Temp['Pred_Labels']))
print("########################################################################################")
########################################################################################