### Importing the data

In [1]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt
from numpy.fft import fft, ifft

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import sklearn.metrics as metrics 

from random import randint

from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

In [3]:
from tsfresh import select_features
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import extract_relevant_features

In [4]:
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score
from sklearn.model_selection import cross_validate

from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


## Loading the Datasets

In [6]:
## importing the data
## pressure sensors are imported as PS. There are around 6 pressure sensors. They are named as ps1, ps2, ps3, ps4, ps5, ps6. 

df_ps1 = pd.read_csv('PS1.txt', delimiter = '\t', header = None)
df_ps2 = pd.read_csv('PS2.txt', delimiter = '\t', header = None)
df_ps3 = pd.read_csv('PS3.txt', delimiter = '\t', header = None)
df_ps4 = pd.read_csv('PS4.txt', delimiter = '\t', header = None)
df_ps5 = pd.read_csv('PS5.txt', delimiter = '\t', header = None)
df_ps6 = pd.read_csv('PS6.txt', delimiter = '\t', header = None)

## cooling efficiency and cooling power are imported as ce and cp respectively. 

df_ce = pd.read_csv('CE.txt',delimiter = '\t', header = None)
df_cp = pd.read_csv('CP.txt', delimiter = '\t', header = None)


# temperature sensors are imported as ts. There are around 4 pressure sensors. They are named as ts1, ts2, ts3, ts4.

df_ts1 = pd.read_csv('TS1.txt', delimiter = '\t', header = None)
df_ts2 = pd.read_csv('TS2.txt', delimiter = '\t', header = None)
df_ts3 = pd.read_csv('TS3.txt', delimiter = '\t', header = None)
df_ts4 = pd.read_csv('TS4.txt', delimiter = '\t', header = None)


# flow sensors are imported as fs. There are around 2 pressure sensors. They are named as fs1, fs2.

df_fs1 = pd.read_csv('FS1.txt', delimiter = '\t', header = None)
df_fs2 = pd.read_csv('FS2.txt', delimiter = '\t', header = None)


# motor power sensors is imported as eps. 
# vibration sensors is imported as vs
# efficiency power is imported as se

df_eps1 = pd.read_csv('EPS1.txt', delimiter = '\t', header = None)
df_se = pd.read_csv('SE.txt', delimiter = '\t', header = None)
df_vs1 = pd.read_csv('VS1.txt', delimiter = '\t', header = None)



In [7]:
## function for defining variables like ps1_1, ps1_2 for 17 sensors with different sampling rates

def col(n, var):
    l = []
    for i in range(1,n):
        temp = str(var) + '_' + '%d' %i
        l.append(temp)
    return l


In [8]:
# 
df_ps1.columns = col(6001,'ps1')
df_ps2.columns = col(6001,'ps2')
df_ps3.columns = col(6001,'ps3')
df_ps4.columns = col(6001,'ps4')
df_ps5.columns = col(6001,'ps5')
df_ps6.columns = col(6001,'ps6')


df_ts1.columns = col(61,'ts1')
df_ts2.columns = col(61,'ts2')
df_ts3.columns = col(61,'ts3')
df_ts4.columns = col(61,'ts4')


df_eps1.columns = col(6001,'eps1')

df_fs1.columns = col(601,'fs1')
df_fs2.columns = col(601,'fs2')

df_vs1.columns = col(61,'vs1')

df_ce.columns = col(61,'ce')

df_cp.columns = col(61,'cp')

df_se.columns = col(61,'se')

In [9]:
df_ps = pd.concat([df_ps1, df_ps2, df_ps3, df_ps4, df_ps5, df_ps6], axis = 1)
df_ts = pd.concat([df_ts1, df_ts2, df_ts3, df_ts4], axis = 1)
df_fs = pd.concat([df_fs1, df_fs2], axis = 1)
df_vir = pd.concat([df_cp, df_ce, df_eps1, df_se, df_vs1], axis = 1)


In [10]:
df = pd.concat([df_ps, df_ts, df_fs, df_vir], axis = 1)
df.head()

Unnamed: 0,ps1_1,ps1_2,ps1_3,ps1_4,ps1_5,ps1_6,ps1_7,ps1_8,ps1_9,ps1_10,...,vs1_51,vs1_52,vs1_53,vs1_54,vs1_55,vs1_56,vs1_57,vs1_58,vs1_59,vs1_60
0,151.47,151.45,151.52,151.27,150.8,150.69,153.89,154.67,152.88,153.82,...,0.554,0.552,0.545,0.553,0.553,0.539,0.544,0.545,0.535,0.543
1,151.11,151.12,151.16,150.92,150.7,150.62,152.4,153.21,152.81,153.53,...,0.555,0.547,0.548,0.544,0.536,0.542,0.54,0.533,0.531,0.534
2,150.81,150.79,150.84,150.65,150.35,150.23,152.03,152.81,152.44,153.27,...,0.543,0.544,0.543,0.554,0.544,0.544,0.545,0.544,0.53,0.534
3,150.48,150.47,150.52,150.31,150.04,149.98,151.63,152.48,152.24,152.94,...,0.549,0.538,0.553,0.543,0.553,0.555,0.544,0.543,0.543,0.542
4,150.41,150.35,150.24,150.12,149.87,149.71,151.64,152.37,151.78,152.68,...,0.546,0.546,0.544,0.552,0.539,0.54,0.549,0.542,0.533,0.537


In [11]:
profile = pd.read_csv('profile.txt', delimiter = '\t', header = None)

In [12]:
profile.columns = ["Cooler Condition","Valve Condition","Internal Pump Leakage","Hydraulic Accumulator","Stable Flag"]

In [13]:
profile_valve = profile['Valve Condition']

### Extract Features using tsfresh

In [14]:
from tsfresh import select_features
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh import extract_relevant_features

from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters

In [15]:
def tsfreshextract(data):
    data["id"] = data.index
    data = data.melt(id_vars="id", var_name="time").sort_values(["id", "time"]).reset_index(drop=True)
    data['T'] = data.time.str.extract('(\d+)')
    data['T'].astype(int)
    
#     y = profile['Stable Flag']
#     y = pd.DataFrame(y)
#     y = y.reset_index()
#     y = y.rename(columns = {'index':'id'})
    
#     new = pd.merge(left = df, right = y, how = 'inner', on = ['id'])
    
#     y = new.pop('Stable Flag')
    
    data = data.drop(['time'], axis = 1)
    
    X = extract_features(data, column_id="id", column_sort="T", default_fc_parameters = MinimalFCParameters(), 
                         n_jobs = 1, impute_function=impute)
   
    
    return X

In [16]:
X_ts1 = tsfreshextract(df_ts1)

Feature Extraction: 100%|██████████| 5/5 [00:10<00:00,  2.13s/it]


In [17]:
X_ts2 = tsfreshextract(df_ts2)

Feature Extraction: 100%|██████████| 5/5 [00:06<00:00,  1.33s/it]


In [18]:
X_ts3 = tsfreshextract(df_ts3)

Feature Extraction: 100%|██████████| 5/5 [00:06<00:00,  1.24s/it]


In [19]:
X_ts4 = tsfreshextract(df_ts4)

Feature Extraction: 100%|██████████| 5/5 [00:05<00:00,  1.17s/it]


In [20]:
X_fs1 = tsfreshextract(df_fs1)

Feature Extraction: 100%|██████████| 5/5 [00:06<00:00,  1.28s/it]


In [21]:
X_fs2 = tsfreshextract(df_fs2)

Feature Extraction: 100%|██████████| 5/5 [00:06<00:00,  1.21s/it]


In [22]:
X_ce = tsfreshextract(df_ce)

Feature Extraction: 100%|██████████| 5/5 [00:05<00:00,  1.18s/it]


In [23]:
X_cp = tsfreshextract(df_cp)

Feature Extraction: 100%|██████████| 5/5 [00:05<00:00,  1.12s/it]


In [24]:
X_se = tsfreshextract(df_se)

Feature Extraction: 100%|██████████| 5/5 [00:05<00:00,  1.14s/it]


In [25]:
X_vs = tsfreshextract(df_vs1)

Feature Extraction: 100%|██████████| 5/5 [00:05<00:00,  1.10s/it]


In [26]:
X_eps = tsfreshextract(df_eps1)

MemoryError: Unable to allocate 101. MiB for an array with shape (13230000,) and data type int64

In [None]:
X_ps1 = tsfreshextract(df_ps1)

In [None]:
X_ps2 = tsfreshextract(df_ps2)

In [None]:
X_ps3 = tsfreshextract(df_ps3)

In [None]:
X_ps4 = tsfreshextract(df_ps4)

In [None]:
X_ps5 = tsfreshextract(df_ps5)

In [None]:
X_ps6 = tsfreshextract(df_ps6)

### Train -Test split

In [None]:
X = pd.concat([X_ps1,X_ps2,X_ps3,X_ps4,X_ps5,X_ps6,X_fs1,X_fs2,X_ce,X_cp,X_se,X_eps,X_ts1, X_ts2, X_ts3, X_ts4, X_vs], axis = 1)
X.head()

In [None]:
X.to_csv('X_extractedfull.csv')

In [None]:
from sklearn.model_selection import train_test_split

# Splitting the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, profile_valve, train_size=0.7, test_size=0.3, random_state=42,stratify = profile_valve)

In [None]:
y_traindf = pd.DataFrame(y_train)
y_testdf = pd.DataFrame(y_test)

print(y_train.value_counts())
print(y_test.value_counts())

In [None]:
X_train

In [None]:

## SMOTE method is used to balance data
from imblearn import under_sampling 
from imblearn import over_sampling
from imblearn.over_sampling import SMOTE

smt = SMOTE(random_state=45, k_neighbors=5)
X_train, y_train = smt.fit_sample(X_train,y_train)
X_train = pd.DataFrame(X_train, columns=X_train.columns)

y_train_smt = pd.DataFrame(y_train)

In [None]:
y_train_smt['Valve Condition'].value_counts()

### Standardisation

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler() ## x-mean/std

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
X_train_scaled = pd.DataFrame(X_train_scaled)
X_test_scaled = pd.DataFrame(X_test_scaled)

In [None]:
X_train_scaled.columns = X.columns 
X_test_scaled.columns = X.columns 

In [None]:

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score
from sklearn.model_selection import cross_validate,GridSearchCV,RandomizedSearchCV

from scipy.stats import randint
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
def evaluate_model(dt_classifier,y_train,X_train,y_test,X_test):
    print("Train Precision :", (precision_score(y_train, dt_classifier.predict(X_train)))*100)
    print("Train Confusion Matrix:")
    print(confusion_matrix(y_train, dt_classifier.predict(X_train)))
    print("-"*50)
    print("Test Precision :", (precision_score(y_test, dt_classifier.predict(X_test)))*100)
    print("Test Confusion Matrix:")
    print(confusion_matrix(y_test, dt_classifier.predict(X_test)))

### SVM for Cooler condition classification

In [None]:
import seaborn as sns 
import matplotlib.pyplot as plt

In [None]:
# training a linear SVM classifier
from sklearn.svm import SVC
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train_scaled, y_train)
svm_predictions = svm_model_linear.predict(X_test_scaled)

In [None]:
# creating a confusion matrix
cm = confusion_matrix(y_test, svm_predictions)
cm

In [None]:
def cmatrix(model):
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.set_title('Confusion Matrix')
    disp = metrics.plot_confusion_matrix(model, X_test_scaled,y_test,ax = ax,
                                        display_labels = ['close to total failure','severe lag','small lag','optimal switching behavior'])
    return disp.confusion_matrix
    

In [None]:
cmatrix(svm_model_linear)

In [None]:
print(metrics.classification_report(y_test, svm_model_linear.predict(X_test_scaled)))

In [None]:
y_score_svm = svm_model_linear.fit(X_train_scaled, y_train).decision_function(X_test)
y_score_svm

In [None]:
from sklearn.metrics import roc_curve, auc
def plot_multiclass_roc(y_score, X_test, y_test, n_classes= 3, figsize = (20,6)):

    # structures
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # calculate dummies once
    y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_dummies[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic example')
    for i in range(n_classes):
        ax.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for label %i' % (roc_auc[i], i))
    ax.legend(loc="best")
    ax.grid(alpha=.4)
    sns.despine()
    plt.show()

In [None]:
plot_multiclass_roc(y_score_svm,X_test_scaled, y_test, 4)

### Building an RF classifier for Cooler 

In [None]:
classifier_rf = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 42)
classifier_rf.fit(X_train_scaled, y_train)

In [None]:
cmatrix(classifier_rf)

In [None]:
y_score_rf = classifier_rf.fit(X_train_scaled, y_train).predict_proba(X_test_scaled)
y_score_rf

In [None]:
plot_multiclass_roc(y_score_rf, X_test_scaled, y_test, n_classes=4, figsize=(20, 6))

In [None]:
print(metrics.classification_report(y_test, classifier_rf.predict(X_test_scaled)))

### kNN for cooler failure classification

In [None]:
# training a KNN classifier
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 7).fit(X_train_scaled, y_train)


In [None]:
cmatrix(knn)

In [None]:
y_score_knn = knn.fit(X_train_scaled, y_train).predict_proba(X_test)
y_score_knn

In [None]:
plot_multiclass_roc(y_score_knn, X_test_scaled, y_test, n_classes=4, figsize=(20, 6))

In [None]:
print(metrics.classification_report(y_test, knn.predict(X_test_scaled)))

### Performance in Noise

In [None]:
import numpy as np 
mu, sigma = 0, np.std(X_test_scaled)*0.20
noise = np.random.normal(mu, sigma, X_test_scaled.shape)

X_testnoise = X_test_scaled + noise

In [None]:
print(metrics.classification_report(y_test, classifier_rf.predict(X_testnoise)))

y_score_rf_hypernoise = classifier_rf.fit(X_train_scaled, y_train).predict_proba(X_testnoise)
y_score_rf_hypernoise

plot_multiclass_roc(y_score_rf_hypernoise, X_testnoise, y_test, n_classes=4, figsize=(20, 6))

In [None]:
print(metrics.classification_report(y_test, knn.predict(X_testnoise)))

y_score_knnnoise = knn.fit(X_train_scaled, y_train).predict_proba(X_testnoise)
y_score_knnnoise

plot_multiclass_roc(y_score_knnnoise, X_testnoise, y_test, n_classes=4, figsize=(20, 6))

In [None]:
print(metrics.classification_report(y_test, svm_model_linear.predict(X_testnoise)))

y_score_svmnoise = svm_model_linear.fit(X_train_scaled, y_train).decision_function(X_testnoise)
y_score_svmnoise

plot_multiclass_roc(y_score_svmnoise, X_testnoise, y_test, n_classes=4, figsize=(20, 6))

### Building a Gradient boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# First we construct our gradient boosting model, 
# We specify 500 trees to start, each with a maximum 
# depth of three. We also specify the random_state 
# hyperparameter to ensure reproduceability.
gbtc = GradientBoostingClassifier(n_estimators=100, random_state= 42)

In [None]:
parameters = {'max_depth':np.arange( 2,6,1 ).tolist()}

In [None]:
clf_gbtc = GridSearchCV(gbtc, parameters,cv=6, n_jobs= -1, iid = True,  refit= True,pre_dispatch= '2*n_jobs')

In [None]:
clf_gbtc.fit(X_train_scaled, y_train)

In [None]:
cmatrix(clf_gbtc)

In [None]:
y_score_gbtc = clf_gbtc.fit(X_train_scaled, y_train).predict_proba(X_test_scaled)
y_score_gbtc

In [None]:
plot_multiclass_roc(y_score_gbtc, X_test_scaled, y_test, n_classes=3, figsize=(20, 6))

In [None]:
import xgboost as xgb

In [None]:
seed= 24
xgb1 = xgb.sklearn.XGBClassifier(learning_rate =0.1,n_estimators=100,max_depth=5,min_child_weight=11,
             gamma=0.1,subsample=0.8,colsample_bytree=0.7,objective='multi:softprob',n_jobs=-1,scale_pos_weight=1,
             seed=seed)
    
xgb1.fit(X_train, y_train)
    
y_pred= xgb1.predict(X_test)
    
    #confusion matrix and classification report
    
cm = confusion_matrix(y_test, y_pred)
    
print(classification_report(y_test, y_pred))