In [1]:
import ipywidgets as widgets
import numpy as np
import os
import pandas as pd

configs = {
    "knmi": {
        "0": [
            "Relative atmospheric humidity",
            "Sunshine duration",
            "Global Radiation",
            "Horizontal visibility",
            "Temperature"
        ]
    },
    "factory zero": {
        "alklimaHeatPump": [
            "flow_temp",
            "return_temp",
            "op_mode"
        ],
        "co2sensor": [
            "co2",
            "voc"
        ],
        "ventilation": [
            "outdoor_temp",
            "room_temp"
        ],
        "smartMeter": ["power"],
        "energyHeatpump": ["power"],
        "solar": ["power"]
    }
}

devNullOutput = widgets.Output()
datasets = next(os.walk("data"), (None, None, []))[2]
results = pd.DataFrame(columns=[
    "Target",
    "Filename",
    "Kurtosis",
    "Skewness",
    "Variance"
])

for conf, sheets in configs.items():
    if conf == "knmi":
        files = [it for it in datasets if ".csv" in it]
    else:
        files = [it for it in datasets if ".xlsx" in it]
    for sheet, columns in sheets.items():
        for path in files:
            # Load the data
            with devNullOutput:
                %run helpers/load_data.ipynb
                config_select.value = conf
                file_select.value = path
                for column in columns:
                    dfloader.add_targets(column, sheet_name=sheet)
            for target in dfloader.targets:
                results = results.append({
                    "Target": target,
                    "Filename": path.split('/')[-1],
                    "Kurtosis": dfloader.df[target].kurtosis(),
                    "Skewness": dfloader.df[target].skew(),
                    "Variance": dfloader.df[target].var(),
                }, ignore_index=True, sort=False)

        results.sort_values(["Target", "Variance"], inplace=True)
        targets = results["Target"].unique()
        for target in targets:
            if any([column in target for column in columns]):
                subdf = results[results["Target"] == target].copy()
                subdf.set_index("Filename", inplace=True)
                display(subdf)

results.to_csv("datasets-stats.csv")


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
277 Lauweroog.csv,Global Radiation,2.411192,1.806076,5236.101872
270 Leeuwarden.csv,Global Radiation,2.281001,1.778832,5440.728923
260 De Bilt.csv,Global Radiation,2.316581,1.7817,5480.606728
269 Lelystad.csv,Global Radiation,2.102313,1.732935,5509.07508
273 Marknesse.csv,Global Radiation,2.045927,1.727645,5516.276015
344 Rotterdam.csv,Global Radiation,2.227166,1.764216,5536.486495
319 Westdorpe.csv,Global Radiation,2.179594,1.748252,5553.808114
240 Schiphol.csv,Global Radiation,2.301225,1.781072,5587.06502
370 Eindhoven.csv,Global Radiation,2.038965,1.720753,5643.03493
215 Voorschoten.csv,Global Radiation,2.216134,1.769378,5747.096504


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
310 Vlissingen.csv,Horizontal visibility,5.723379,-2.004014,178.406185
240 Schiphol.csv,Horizontal visibility,5.657232,-1.992856,180.385335
380 Maastricht.csv,Horizontal visibility,5.914903,-2.157363,198.207492
344 Rotterdam.csv,Horizontal visibility,5.528981,-2.066078,202.306269
370 Eindhoven.csv,Horizontal visibility,4.864109,-1.946009,219.936995
242 Vlieland.csv,Horizontal visibility,4.183326,-1.866474,220.5204
235 De Kooy.csv,Horizontal visibility,4.712058,-1.985172,221.891638
269 Lelystad.csv,Horizontal visibility,4.486636,-1.893136,224.120262
270 Leeuwarden.csv,Horizontal visibility,4.66324,-1.936743,224.603956
319 Westdorpe.csv,Horizontal visibility,4.673964,-1.965474,232.693634


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
242 Vlieland.csv,Relative atmospheric humidity,-0.091373,-0.611143,136.082506
235 De Kooy.csv,Relative atmospheric humidity,0.208701,-0.697365,146.491385
277 Lauweroog.csv,Relative atmospheric humidity,0.464874,-0.813875,164.119877
310 Vlissingen.csv,Relative atmospheric humidity,0.389863,-0.806885,169.171195
215 Voorschoten.csv,Relative atmospheric humidity,0.399248,-0.883834,192.12768
270 Leeuwarden.csv,Relative atmospheric humidity,0.732393,-1.035249,194.019054
257 Wijk aan Zee.csv,Relative atmospheric humidity,0.720209,-0.887483,195.974118
330 Hoek van Holland.csv,Relative atmospheric humidity,0.547653,-0.875818,212.489813
240 Schiphol.csv,Relative atmospheric humidity,0.197263,-0.876259,220.859013
344 Rotterdam.csv,Relative atmospheric humidity,0.269973,-0.889676,221.329244


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
277 Lauweroog.csv,Sunshine duration,-0.018336,1.301396,13.546725
344 Rotterdam.csv,Sunshine duration,-0.097856,1.273432,13.711049
370 Eindhoven.csv,Sunshine duration,-0.108439,1.267229,13.728232
319 Westdorpe.csv,Sunshine duration,-0.114079,1.26599,13.752027
270 Leeuwarden.csv,Sunshine duration,-0.131184,1.262467,13.77598
260 De Bilt.csv,Sunshine duration,-0.101631,1.274298,13.78526
240 Schiphol.csv,Sunshine duration,-0.165993,1.24943,13.968691
380 Maastricht.csv,Sunshine duration,-0.146066,1.26171,14.054592
215 Voorschoten.csv,Sunshine duration,-0.19327,1.240857,14.070599
273 Marknesse.csv,Sunshine duration,-0.209619,1.233938,14.09922


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
242 Vlieland.csv,Temperature,-0.341077,0.061516,3641.581894
235 De Kooy.csv,Temperature,-0.315997,0.116297,3854.637512
310 Vlissingen.csv,Temperature,-0.450651,0.158997,3880.684656
330 Hoek van Holland.csv,Temperature,-0.186742,0.174108,3999.480353
257 Wijk aan Zee.csv,Temperature,-0.085047,0.141761,4060.446933
215 Voorschoten.csv,Temperature,-0.224403,0.170466,4319.492326
277 Lauweroog.csv,Temperature,-0.388548,0.116627,4356.882854
270 Leeuwarden.csv,Temperature,-0.184904,0.177662,4550.405649
344 Rotterdam.csv,Temperature,-0.223448,0.211647,4616.819543
240 Schiphol.csv,Temperature,-0.276347,0.212119,4633.858085


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100.xlsx,alklimaHeatPump_flow_temp,3.90719,1.269625,56.198506
025.xlsx,alklimaHeatPump_flow_temp,3.595548,1.385641,67.986711
060.xlsx,alklimaHeatPump_flow_temp,5.179285,1.825447,72.078213
058.xlsx,alklimaHeatPump_flow_temp,3.712097,1.482725,73.70909
041.xlsx,alklimaHeatPump_flow_temp,5.763603,2.097663,74.527722
072.xlsx,alklimaHeatPump_flow_temp,3.034413,1.316247,79.464301
021.xlsx,alklimaHeatPump_flow_temp,2.667842,1.362471,79.573183
055.xlsx,alklimaHeatPump_flow_temp,4.833582,1.936356,82.084329
099.xlsx,alklimaHeatPump_flow_temp,2.752413,1.416585,85.055249
039.xlsx,alklimaHeatPump_flow_temp,1.983665,1.217769,87.87854


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
055.xlsx,alklimaHeatPump_op_mode,6.89721,2.201308,0.953265
115.xlsx,alklimaHeatPump_op_mode,1.643632,1.018718,0.989938
041.xlsx,alklimaHeatPump_op_mode,8.665749,2.627587,1.017741
040.xlsx,alklimaHeatPump_op_mode,4.189058,1.547901,1.028655
078.xlsx,alklimaHeatPump_op_mode,11.818055,3.266558,1.052573
072.xlsx,alklimaHeatPump_op_mode,3.140042,1.432543,1.126741
021.xlsx,alklimaHeatPump_op_mode,5.466161,2.073084,1.14727
058.xlsx,alklimaHeatPump_op_mode,3.089438,1.449617,1.202635
056.xlsx,alklimaHeatPump_op_mode,3.226402,1.413058,1.216212
099.xlsx,alklimaHeatPump_op_mode,3.582762,1.603526,1.276359


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100.xlsx,alklimaHeatPump_return_temp,6.957032,1.809408,34.39653
025.xlsx,alklimaHeatPump_return_temp,5.827448,1.849007,49.006644
060.xlsx,alklimaHeatPump_return_temp,5.913337,1.893119,51.208977
072.xlsx,alklimaHeatPump_return_temp,4.270218,1.578042,56.558401
058.xlsx,alklimaHeatPump_return_temp,4.452434,1.564483,57.91937
021.xlsx,alklimaHeatPump_return_temp,3.523568,1.493152,59.564849
099.xlsx,alklimaHeatPump_return_temp,3.406986,1.512186,60.986766
041.xlsx,alklimaHeatPump_return_temp,5.284135,1.991299,61.196776
055.xlsx,alklimaHeatPump_return_temp,4.895434,1.964042,62.231943
037.xlsx,alklimaHeatPump_return_temp,2.588741,1.008791,62.581856


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
051.xlsx,co2sensor_co2,0.597312,0.642918,4024.652547
105.xlsx,co2sensor_co2,2.450164,1.108046,4991.147817
055.xlsx,co2sensor_co2,-0.317036,0.290282,6387.971801
042.xlsx,co2sensor_co2,1.856234,1.213582,6561.357549
100.xlsx,co2sensor_co2,1.934123,1.139904,6823.3848
060.xlsx,co2sensor_co2,11.143047,1.576942,7257.952867
041.xlsx,co2sensor_co2,2.81572,1.023411,8088.214956
072.xlsx,co2sensor_co2,0.423756,0.62783,8701.616909
025.xlsx,co2sensor_co2,10.533676,1.882787,8707.439419
057.xlsx,co2sensor_co2,1.904149,1.168515,9057.533215


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
040.xlsx,co2sensor_voc,467.807493,19.137892,0.117325
072.xlsx,co2sensor_voc,8.548821,1.367034,0.585398
105.xlsx,co2sensor_voc,16.433925,2.39737,1.198088
037.xlsx,co2sensor_voc,13.214636,2.152354,1.403934
054.xlsx,co2sensor_voc,11.63494,2.444956,1.497003
058.xlsx,co2sensor_voc,38.756504,4.789658,2.168363
115.xlsx,co2sensor_voc,2.891402,1.091204,2.632821
057.xlsx,co2sensor_voc,8.955377,2.587503,2.810242
041.xlsx,co2sensor_voc,3.873378,1.543356,2.845977
039.xlsx,co2sensor_voc,1.394088,0.833221,3.28056


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
039.xlsx,ventilation_outdoor_temp,-0.639663,0.204735,22.534877
025.xlsx,ventilation_outdoor_temp,-0.347598,0.219849,24.736473
040.xlsx,ventilation_outdoor_temp,-0.558514,0.087928,42.799373
078.xlsx,ventilation_outdoor_temp,-0.367829,0.270978,45.456896
021.xlsx,ventilation_outdoor_temp,-0.253805,0.323639,46.196899
057.xlsx,ventilation_outdoor_temp,-0.131137,0.35393,47.401185
041.xlsx,ventilation_outdoor_temp,-0.068019,0.422037,48.079551
058.xlsx,ventilation_outdoor_temp,-0.021302,0.345537,48.127257
055.xlsx,ventilation_outdoor_temp,-0.24236,0.271153,48.577618
105.xlsx,ventilation_outdoor_temp,-0.219287,0.30268,49.463048


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
039.xlsx,ventilation_room_temp,-0.77514,0.416183,2.183257
025.xlsx,ventilation_room_temp,2.447603,0.987795,2.707731
105.xlsx,ventilation_room_temp,0.045391,0.406506,5.681954
056.xlsx,ventilation_room_temp,0.797087,0.937936,6.197076
041.xlsx,ventilation_room_temp,-0.07026,0.733098,7.055284
115.xlsx,ventilation_room_temp,0.210272,0.677394,7.724185
042.xlsx,ventilation_room_temp,-0.352413,0.623569,8.008638
057.xlsx,ventilation_room_temp,0.002568,0.674181,8.17102
037.xlsx,ventilation_room_temp,-0.299647,0.562922,8.521195
058.xlsx,ventilation_room_temp,-0.111472,0.379618,8.721601


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
021.xlsx,smartMeter_power,3.349327,-1.079316,1524748.0
025.xlsx,smartMeter_power,3.111696,-1.072309,2048204.0
058.xlsx,smartMeter_power,2.061667,-1.254969,2169659.0
100.xlsx,smartMeter_power,1.710314,-1.346381,2268810.0
105.xlsx,smartMeter_power,1.887806,-1.000449,2313417.0
054.xlsx,smartMeter_power,1.73615,-1.008938,2318747.0
041.xlsx,smartMeter_power,1.694302,-1.070226,2319793.0
055.xlsx,smartMeter_power,1.730527,-1.042393,2383481.0
078.xlsx,smartMeter_power,2.386689,-0.014369,2440257.0
099.xlsx,smartMeter_power,1.429675,-1.084015,2496833.0


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100.xlsx,energyHeatpump_power,11.722939,3.026017,138409.82238
058.xlsx,energyHeatpump_power,11.335755,3.232636,168396.41007
060.xlsx,energyHeatpump_power,20.939029,4.33283,173615.713245
025.xlsx,energyHeatpump_power,8.834837,2.795472,182586.788519
041.xlsx,energyHeatpump_power,12.517088,3.642728,200613.629329
021.xlsx,energyHeatpump_power,8.242703,2.849148,202827.128082
072.xlsx,energyHeatpump_power,7.885552,2.804696,218411.847017
037.xlsx,energyHeatpump_power,10.260419,3.049668,220313.903306
055.xlsx,energyHeatpump_power,9.702649,3.192566,230075.139724
105.xlsx,energyHeatpump_power,7.137346,2.832425,249233.599847


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
021.xlsx,smartMeter_power,3.349327,-1.079316,1524748.0
025.xlsx,smartMeter_power,3.111696,-1.072309,2048204.0
058.xlsx,smartMeter_power,2.061667,-1.254969,2169659.0
100.xlsx,smartMeter_power,1.710314,-1.346381,2268810.0
105.xlsx,smartMeter_power,1.887806,-1.000449,2313417.0
054.xlsx,smartMeter_power,1.73615,-1.008938,2318747.0
041.xlsx,smartMeter_power,1.694302,-1.070226,2319793.0
055.xlsx,smartMeter_power,1.730527,-1.042393,2383481.0
078.xlsx,smartMeter_power,2.386689,-0.014369,2440257.0
099.xlsx,smartMeter_power,1.429675,-1.084015,2496833.0


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100.xlsx,energyHeatpump_power,11.722939,3.026017,138409.82238
058.xlsx,energyHeatpump_power,11.335755,3.232636,168396.41007
060.xlsx,energyHeatpump_power,20.939029,4.33283,173615.713245
025.xlsx,energyHeatpump_power,8.834837,2.795472,182586.788519
041.xlsx,energyHeatpump_power,12.517088,3.642728,200613.629329
021.xlsx,energyHeatpump_power,8.242703,2.849148,202827.128082
072.xlsx,energyHeatpump_power,7.885552,2.804696,218411.847017
037.xlsx,energyHeatpump_power,10.260419,3.049668,220313.903306
055.xlsx,energyHeatpump_power,9.702649,3.192566,230075.139724
105.xlsx,energyHeatpump_power,7.137346,2.832425,249233.599847


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
021.xlsx,smartMeter_power,3.349327,-1.079316,1524748.0
025.xlsx,smartMeter_power,3.111696,-1.072309,2048204.0
058.xlsx,smartMeter_power,2.061667,-1.254969,2169659.0
100.xlsx,smartMeter_power,1.710314,-1.346381,2268810.0
105.xlsx,smartMeter_power,1.887806,-1.000449,2313417.0
054.xlsx,smartMeter_power,1.73615,-1.008938,2318747.0
041.xlsx,smartMeter_power,1.694302,-1.070226,2319793.0
055.xlsx,smartMeter_power,1.730527,-1.042393,2383481.0
078.xlsx,smartMeter_power,2.386689,-0.014369,2440257.0
099.xlsx,smartMeter_power,1.429675,-1.084015,2496833.0


Unnamed: 0_level_0,Target,Kurtosis,Skewness,Variance
Filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
105.xlsx,solar_power,2.920271,-1.908463,1674978.0
058.xlsx,solar_power,2.68671,-1.85365,1718501.0
057.xlsx,solar_power,2.864807,-1.898034,1743540.0
041.xlsx,solar_power,2.059979,-1.718703,1838564.0
056.xlsx,solar_power,2.290262,-1.772097,1840389.0
021.xlsx,solar_power,2.30608,-1.778787,1840801.0
042.xlsx,solar_power,2.035643,-1.71218,1844287.0
078.xlsx,solar_power,2.262501,-1.761568,1847439.0
040.xlsx,solar_power,2.135573,-1.736437,1847825.0
039.xlsx,solar_power,2.253608,-1.765694,1870385.0
