In [1]:
import pandas as pd

data_path = "./data/"

import warnings
warnings.filterwarnings(action="ignore")

In [2]:
signals_training = pd.read_csv(data_path + "wind-farm-1-signals-training.csv", sep=";")
signals_training['Timestamp'] = pd.to_datetime(signals_training['Timestamp'])
turbines_id = list(pd.unique(signals_training['Turbine_ID']))

In [3]:
signal_variables = signals_training.columns.to_list()
bearings_variables = list(filter(lambda x: x.startswith("Gen_Bear"), signal_variables))
generator_variables = list(filter(lambda x: x.startswith("Gen") and not x.startswith("Gen_Bear"), signal_variables))
gearbox_variables = list(filter(lambda x: x.startswith("Gear"), signal_variables))
transformer_variables = list(filter(lambda x: x.startswith("HVTrafo"), signal_variables))
hydraulic_variables = list(filter(lambda x: x.startswith("Hyd"), signal_variables))

bearings = signals_training[['Turbine_ID','Timestamp'] + bearings_variables]
generators = signals_training[['Turbine_ID','Timestamp'] + generator_variables]
gearbox = signals_training[['Turbine_ID','Timestamp'] + gearbox_variables]
transformer = signals_training[['Turbine_ID','Timestamp'] + transformer_variables]
hydraulic = signals_training[['Turbine_ID','Timestamp'] + hydraulic_variables]

In [4]:
weather_training = pd.read_csv(data_path + "wind-farm-1-metmast-training.csv", sep=";")
weather_training['Timestamp'] = pd.to_datetime(weather_training['Timestamp'])
weather_variables = weather_training.columns.to_list()
weather_variables.remove('Timestamp')

In [5]:
failures = pd.read_csv(data_path + "htw-failures-training.csv")
failures['Timestamp'] = pd.to_datetime(failures['Timestamp'])
failures['Timestamp'] = failures['Timestamp'].dt.round('10T')
failures['Label'] = 1

In [6]:
signal_dic = {
    'GENERATOR': generators,
    'HYDRAULIC_GROUP': hydraulic,
    'GENERATOR_BEARING': bearings,
    'TRANSFORMER': transformer,
    'GEARBOX': gearbox
}

In [8]:
processed = {}

for comp, data in signal_dic.items():
    df = pd.DataFrame()
    for t in turbines_id:
        temp = pd.merge(
            data[data['Turbine_ID'] == t], 
            weather_training, 
            how="outer", 
            on='Timestamp'
            ).merge(
                failures[failures['Component'] == comp][['Turbine_ID','Timestamp','Label']],
                how="outer",
                on=['Turbine_ID','Timestamp']
                )
        temp.sort_values(by=['Timestamp'], inplace=True)
        temp.Label.fillna(0, inplace=True)
        temp.fillna(method='ffill', inplace=True)
        df = pd.concat([df, temp])
    processed[comp] = df

In [9]:
for comp, data in processed.items():
    assert(data.isna().sum().sum() == 0)

In [13]:
save_data_path = "./data/"
for comp, data in processed.items():
    data.to_csv(save_data_path+comp+"_processed.csv", index=False)