In [21]:
import numpy as np
import pandas as pd
import polars as pl
import matplotlib.pyplot as plt
import os
import zipfile
import warnings
warnings.filterwarnings("ignore")
import wtphm
import data.data_processing as get

In [2]:
# auto reload modules
%load_ext autoreload
%autoreload 2

In [3]:
print(dir(get))

['Guck_et_al', 'SCADA', 'WTPHM', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'get_vibration', 'os', 'partial', 'pd', 'pl']


For the anomaly detection data sets, each file in `Wind Farm {}/datasets` represents an individual turbine.

In [18]:
guck = get.Guck_et_al("All")
all_wind_farms = guck.get_data(collect=False)

In [19]:
all_wind_farms.keys()

dict_keys(['A', 'B', 'C'])

In [20]:
wind_farm_a, wind_farm_b, wind_farm_c = all_wind_farms['A'], all_wind_farms['B'], all_wind_farms['C']

In [25]:
# Using a lazy DataFrame
lazy_df = wind_farm_a['dataset_10']
# Perform further lazy operations if needed
result = lazy_df.filter(pl.col('wind_speed_3_avg') > 5).collect()
print(result.head())

shape: (5, 86)
┌────────────┬──────────┬─────┬────────────┬───┬────────────┬────────────┬────────────┬────────────┐
│ time_stamp ┆ asset_id ┆ id  ┆ train_test ┆ … ┆ sensor_52_ ┆ sensor_52_ ┆ sensor_52_ ┆ sensor_53_ │
│ ---        ┆ ---      ┆ --- ┆ ---        ┆   ┆ max        ┆ min        ┆ std        ┆ avg        │
│ datetime[μ ┆ i64      ┆ i64 ┆ str        ┆   ┆ ---        ┆ ---        ┆ ---        ┆ ---        │
│ s]         ┆          ┆     ┆            ┆   ┆ f64        ┆ f64        ┆ f64        ┆ f64        │
╞════════════╪══════════╪═════╪════════════╪═══╪════════════╪════════════╪════════════╪════════════╡
│ 2013-10-10 ┆ 10       ┆ 22  ┆ train      ┆ … ┆ 12.8       ┆ 10.8       ┆ 0.5        ┆ 28.0       │
│ 12:20:00   ┆          ┆     ┆            ┆   ┆            ┆            ┆            ┆            │
│ 2013-10-10 ┆ 10       ┆ 23  ┆ train      ┆ … ┆ 13.5       ┆ 11.1       ┆ 0.6        ┆ 28.0       │
│ 12:30:00   ┆          ┆     ┆            ┆   ┆            ┆            ┆  

In [14]:
# Print head of turbine 0
wind_farm_a['dataset_10'].head()

time_stamp,asset_id,id,train_test,status_type_id,sensor_0_avg,sensor_1_avg,sensor_2_avg,wind_speed_3_avg,wind_speed_4_avg,wind_speed_3_max,wind_speed_3_min,wind_speed_3_std,sensor_5_avg,sensor_5_max,sensor_5_min,sensor_5_std,sensor_6_avg,sensor_7_avg,sensor_8_avg,sensor_9_avg,sensor_10_avg,sensor_11_avg,sensor_12_avg,sensor_13_avg,sensor_14_avg,sensor_15_avg,sensor_16_avg,sensor_17_avg,sensor_18_avg,sensor_18_max,sensor_18_min,sensor_18_std,sensor_19_avg,sensor_20_avg,sensor_21_avg,sensor_22_avg,…,power_29_avg,power_29_max,power_29_min,power_29_std,power_30_avg,power_30_max,power_30_min,power_30_std,sensor_31_avg,sensor_31_max,sensor_31_min,sensor_31_std,sensor_32_avg,sensor_33_avg,sensor_34_avg,sensor_35_avg,sensor_36_avg,sensor_37_avg,sensor_38_avg,sensor_39_avg,sensor_40_avg,sensor_41_avg,sensor_42_avg,sensor_43_avg,sensor_44,sensor_45,sensor_46,sensor_47,sensor_48,sensor_49,sensor_50,sensor_51,sensor_52_avg,sensor_52_max,sensor_52_min,sensor_52_std,sensor_53_avg
datetime[μs],i64,i64,str,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2013-10-10 08:40:00,10,0,"""train""",0,20.0,265.7,19.4,2.6,2.6,4.3,1.0,0.4,24.0,24.1,24.0,0.0,30.0,35.0,44.0,35.0,31.0,39.0,39.0,30.0,32.0,34.0,34.0,35.0,203.7,224.1,166.3,13.5,24.0,31.0,32.0,0.6,…,0.000976,0.017073,0.0,0.003171,-0.004,-0.003317,-0.013902,0.001707,-0.005707,-10.5,-25.4,2.1,401.6,399.7,398.1,31.0,32.0,32.0,69.0,70.0,64.0,49.0,246.3,29.0,-1371.0,0.0,0.0,-1946.0,0.0,0.0,-1371.0,-1946.0,1.8,2.0,0.0,0.2,23.0
2013-10-10 08:50:00,10,1,"""train""",0,20.0,244.9,-11.8,2.6,2.6,4.9,0.7,0.5,24.0,24.1,24.0,0.0,30.0,35.0,43.0,35.0,31.0,39.0,39.0,30.0,32.0,34.0,34.0,34.0,200.2,224.1,168.0,15.0,24.0,31.0,32.0,0.6,…,0.000878,0.01322,0.0,0.002732,-0.003854,-0.003268,-0.012098,0.001756,-0.005415,-10.5,-15.4,0.9,400.3,397.8,396.6,31.0,32.0,32.0,69.0,70.0,64.0,49.0,256.8,29.0,-1309.0,0.0,0.0,-1850.0,0.0,0.0,-1309.0,-1850.0,1.8,2.0,1.5,0.1,23.0
2013-10-10 09:00:00,10,2,"""train""",0,20.0,299.5,42.7,2.5,2.5,4.2,0.6,0.5,24.0,24.0,21.6,0.1,30.0,35.0,42.0,35.0,31.0,38.0,39.0,30.0,31.0,34.0,34.0,34.0,202.8,224.4,148.2,23.6,24.0,30.0,31.0,0.6,…,0.0,0.007415,0.0,0.000244,-0.004,-0.003268,-0.012341,0.001902,-0.005415,-10.6,-15.4,0.9,400.0,398.3,396.5,31.0,32.0,32.0,69.0,70.0,64.0,49.0,256.8,28.0,-1349.0,0.0,0.0,-1851.0,0.0,0.0,-1349.0,-1851.0,1.6,2.0,0.0,0.7,23.0
2013-10-10 09:10:00,10,3,"""train""",0,20.0,280.2,23.5,2.5,2.5,3.8,0.6,0.4,24.0,24.1,24.0,0.0,30.0,35.0,41.0,35.0,31.0,38.0,39.0,30.0,31.0,34.0,34.0,34.0,198.3,212.4,170.4,12.0,24.0,30.0,31.0,0.6,…,0.0,0.0,0.0,0.0,-0.003854,-0.003268,-0.012439,0.00161,-0.005317,-10.4,-15.3,0.8,398.3,397.0,395.0,31.0,32.0,32.0,69.0,70.0,64.0,49.0,256.8,28.0,-1326.0,0.0,0.0,-1824.0,0.0,0.0,-1326.0,-1824.0,1.7,1.9,1.5,0.1,23.0
2013-10-10 09:20:00,10,4,"""train""",0,20.0,281.1,24.3,2.7,2.7,4.2,0.9,0.3,24.0,24.0,24.0,0.0,30.0,35.0,40.0,35.0,31.0,38.0,39.0,30.0,31.0,33.0,34.0,34.0,217.4,233.4,205.2,7.9,24.0,30.0,31.0,0.6,…,4.9e-05,0.007951,0.0,0.000439,-0.003854,-0.003268,-0.012439,0.001707,-0.005366,-10.4,-15.3,0.9,399.1,397.9,395.9,31.0,32.0,32.0,69.0,70.0,64.0,49.0,256.8,28.0,-1324.0,0.0,0.0,-1838.0,0.0,0.0,-1324.0,-1838.0,1.9,2.1,1.8,0.1,23.0


In [15]:
wind_farm_a['event_info'].head()

event_id,event_label,event_start,event_start_id,event_end,event_end_id,event_description
i64,str,datetime[μs],i64,datetime[μs],i64,str
68,"""anomaly""",2015-07-29 13:20:00,52063,2015-08-12 13:10:00,54076,"""Transformer failure"""
22,"""anomaly""",2021-08-11 09:50:00,51888,2021-08-18 10:00:00,52892,"""Hydraulic group"""
72,"""anomaly""",2021-10-09 08:40:00,52497,2021-10-16 08:40:00,53505,"""Gearbox failure"""
73,"""anomaly""",2023-06-09 11:40:00,52745,2023-06-16 11:40:00,53753,"""Hydraulic group"""
0,"""anomaly""",2022-08-05 06:10:00,52436,2022-08-19 06:10:00,54447,"""Generator bearing failure"""


In [17]:
wind_farm_a['feature_description'].head()

sensor_name,statistics_type,description,unit,is_angle,is_counter
str,str,str,str,bool,bool
"""sensor_0""","""average""","""Ambient temperature""","""°C""",False,False
"""sensor_1""","""average""","""Wind absolute direction""","""°""",True,False
"""sensor_2""","""average""","""Wind relative direction""","""°""",True,False
"""wind_speed_3""","""maximum,minimum,average,std_de…","""Windspeed""","""m/s""",False,False
"""wind_speed_4""","""average""","""Estimated windspeed""","""m/s""",False,False
