In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import joblib

import warnings
warnings.filterwarnings(action="ignore")

from simple_neural_network.nn_model import NNModel
from simple_neural_network.process_xy import Process_XYData

### Hydraulic Group

In [44]:
def read(data_path):
    df = pd.read_csv(data_path)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.sort_values(by=['Turbine_ID','Timestamp'], inplace=True)
    return df

df = read("./data/all_signals_hourly_processed.csv")
df_test = read("./data/test_signals_hourly_processed.csv")
select_feat = pd.read_csv("./saved_results/select_feat.csv")

In [45]:
for c in df.columns[-20:]:
    df_test[c] = None
last_60 = df_test['Timestamp'].min() - np.timedelta64(60, 'D')
df_train_last60 = df[df['Timestamp'] >= last_60]
df_test = pd.concat([df_test, df_train_last60]).groupby(by=['Turbine_ID','Timestamp']).first().reset_index()

In [46]:
feat_list = select_feat['feature'].to_list()
feat_list.remove('const')
df_test = df_test[df_test.iloc[:, :2].columns.to_list() + feat_list + df_test.iloc[:, -20:].columns.to_list()]

In [47]:
# Parameters
steps = [24, 3*24, 7*24, 14*24]
subsystem = "HYDRAULIC_GROUP"

In [48]:
process = Process_XYData()
test_xy = process.get_XY_with_steps(df_test, subsystem, None, steps)

In [49]:
X_test, _, _, _, _ = process.split_component(test_xy)
X_test.shape

(21042, 169)

In [50]:
nn_model = NNModel(X_test.shape[1])
nn_model.load_model("./saved_models/model_for_test_HYDRAULIC_GROUP1")

In [51]:
test_xy['Pred'] = nn_model.predict(X_test, threshold=0.9)



In [54]:
res = test_xy[test_xy['Pred'] == 1][['Turbine_ID','Timestamp']].reset_index(drop=True)
res
# res.to_csv(f"./saved_results/res_{subsystem}.csv", index=False)

Unnamed: 0,Turbine_ID,Timestamp
0,T01,2017-08-27 10:00:00
1,T06,2017-08-30 01:00:00
2,T11,2017-09-01 07:00:00
3,T11,2017-09-17 09:00:00


### Gearbox

In [24]:
def read(data_path):
    df = pd.read_csv(data_path)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df.sort_values(by=['Turbine_ID','Timestamp'], inplace=True)
    return df

df = read("./data/all_signals_hourly_processed.csv")
df_test = read("./data/test_signals_hourly_processed.csv")
select_feat = pd.read_csv("./saved_results/select_feat.csv")

In [25]:
for c in df.columns[-20:]:
    df_test[c] = None
last_60 = df_test['Timestamp'].min() - np.timedelta64(60, 'D')
df_train_last60 = df[df['Timestamp'] >= last_60]
df_test = pd.concat([df_test, df_train_last60]).groupby(by=['Turbine_ID','Timestamp']).first().reset_index()

In [26]:
feat_list = select_feat['feature'].to_list()
feat_list.remove('const')
df_test = df_test[df_test.iloc[:, :2].columns.to_list() + feat_list + df_test.iloc[:, -20:].columns.to_list()]

In [27]:
# Parameters
steps = [24, 3*24, 7*24, 14*24]
subsystem = "GEARBOX"

In [28]:
process = Process_XYData()
test_xy = process.get_XY_with_steps(df_test, subsystem, None, steps)

In [29]:
X_test, _, _, _, _ = process.split_component(test_xy)
X_test.shape

(21042, 169)

In [30]:
nn_model = NNModel(X_test.shape[1])
nn_model.load_model("./saved_models/model_for_test_GEARBOX")
nn_model1 = NNModel(X_test.shape[1])
nn_model1.load_model("./saved_models/model_for_test_GEARBOX1")

In [31]:
test_xy['Pred'] = nn_model.predict(X_test, threshold=0.9)
test_xy['Pred1'] = nn_model1.predict(X_test, threshold=0.9)



In [32]:
test_xy[(test_xy['Pred'] == 1) & (test_xy['Pred1'] == 1)][['Turbine_ID','Timestamp']]

Unnamed: 0,Turbine_ID,Timestamp
1029,T01,2017-08-02 08:00:00
1030,T01,2017-08-02 09:00:00
1031,T01,2017-08-02 10:00:00
1032,T01,2017-08-02 11:00:00
1033,T01,2017-08-02 12:00:00
1034,T01,2017-08-02 13:00:00
1035,T01,2017-08-02 14:00:00
3742,T01,2017-11-29 20:00:00
3743,T01,2017-11-29 21:00:00
3744,T01,2017-11-29 22:00:00


A lot of consecutive warnings within the same day of the same turbine. We randomly select one of the consecutive warnings as our final warning. The results shouldn't vary much if a random different warning is selected within the same day.

In [42]:
res = test_xy[(test_xy['Pred'] == 1) & (test_xy['Pred1'] == 1)][['Turbine_ID','Timestamp']].reset_index(drop=True)
res['Date'] = res['Timestamp'].dt.date
res = res.groupby(['Turbine_ID','Date']).apply(lambda x: x.sample(1)).reset_index(drop=True).drop(columns=['Date'])
res
# res.to_csv(f"./saved_results/res_{subsystem}.csv", index=False)

Unnamed: 0,Turbine_ID,Timestamp
0,T01,2017-08-02 11:00:00
1,T01,2017-11-29 20:00:00
2,T06,2017-09-24 06:00:00
3,T07,2017-08-19 15:00:00
4,T09,2017-09-08 13:00:00
5,T09,2017-11-06 23:00:00
6,T11,2017-09-17 09:00:00


### Combine

In [58]:
res1 = pd.read_csv("./saved_results/res_GEARBOX.csv")
res1['Component'] = 'GEARBOX'
res1['Timestamp'] = pd.to_datetime(res1['Timestamp'])

res2 = pd.read_csv("./saved_results/res_HYDRAULIC_GROUP.csv")
res2['Component'] = 'HYDRAULIC_GROUP'
res2['Timestamp'] = pd.to_datetime(res2['Timestamp'])

In [61]:
res_final = pd.concat([res1, res2])
res_final['Remarks'] = None
res_final = res_final[['Turbine_ID','Component','Timestamp','Remarks']]
res_final

Unnamed: 0,Turbine_ID,Component,Timestamp,Remarks
0,T01,GEARBOX,2017-08-02 11:00:00,
1,T01,GEARBOX,2017-11-29 20:00:00,
2,T06,GEARBOX,2017-09-24 06:00:00,
3,T07,GEARBOX,2017-08-19 15:00:00,
4,T09,GEARBOX,2017-09-08 13:00:00,
5,T09,GEARBOX,2017-11-06 23:00:00,
6,T11,GEARBOX,2017-09-17 09:00:00,
0,T01,HYDRAULIC_GROUP,2017-08-27 10:00:00,
1,T06,HYDRAULIC_GROUP,2017-08-30 01:00:00,
2,T11,HYDRAULIC_GROUP,2017-09-01 07:00:00,


In [62]:
# res_final.to_csv("./saved_results/results.csv", index=False)