In [18]:
import os
import numpy as np
import pandas as pd
import xarray as xr
import tempfile
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from datetime import datetime


In [19]:
from station.data_submission import DataSubmission
from station.station import StationData


from station.dat_to_nc_all_var import DatToNcAllVar


In [20]:
# Initialize the converter
converter = DatToNcAllVar(
    name="Vienna_AllVar",
    directory="measurements/Vienna",
    target_directory="station_data_as_nc",
    hourly=True,
    keep_original=True
)


In [21]:
converter.execute()

Extracting Vienna_AllVar...


100%|██████████| 3714/3714 [00:30<00:00, 123.56it/s]


Saving to station_data_as_nc/vienna_allvar.nc


  self.dataframe[col] = pd.to_numeric(self.dataframe[col], errors='ignore')


In [25]:
raw_df = converter.original_df

print(raw_df.head())

processed_df = converter.dataframe

print(processed_df.head())


                     bmp180_temp  bmp180_pres  bmp180_slp  bmp180_alt  \
datetime                                                                
2017-04-27 00:00:00      -999.99      -999.99     -999.99     -999.99   
2017-04-27 00:01:00      -999.99      -999.99     -999.99     -999.99   
2017-04-27 00:02:00      -999.99      -999.99     -999.99     -999.99   
2017-04-27 00:03:00      -999.99      -999.99     -999.99     -999.99   
2017-04-27 00:04:00      -999.99      -999.99     -999.99     -999.99   

                     bmp280_temp  bmp280_pres  bmp280_slp  bmp280_alt  \
datetime                                                                
2017-04-27 00:00:00         8.38       993.06     1012.37       159.0   
2017-04-27 00:01:00         8.37       993.08     1012.40       159.0   
2017-04-27 00:02:00         8.36       993.03     1012.35       159.0   
2017-04-27 00:03:00         8.35       993.02     1012.34       159.0   
2017-04-27 00:04:00         8.33       993.04     

In [26]:
raw_df.to_csv("station_data_raw_original/vienna_raw.csv")

In [27]:
processed_df.to_csv("station_data_raw_original/vienna_processed.csv")

In [68]:
def counting_tipping_values(station_data, raworprocessed):
    # Count the number of times tipping is 0.0
    tipping_zero_count = station_data[station_data['tipping'] == 0.0].shape[0]

    # Count the number of times tipping is non-zero and not NaN
    tipping_non_zero_count = station_data[(station_data['tipping'] != 0.0) & (station_data['tipping'].notna())].shape[0]

    # Total count of rows where tipping is not NaN
    tipping_total_count = station_data[station_data['tipping'].notna()].shape[0]

    # Print the counts
    print("Raw or processed:", raworprocessed)
    print(f"Tipping equals 0.0: {tipping_zero_count}")
    print(f"Tipping is non-zero and not NaN: {tipping_non_zero_count}")
    print(f"Total number of tipping measurements (non-NaN): {tipping_total_count}")

    # Calculate percentages
    tipping_zero_percentage = (tipping_zero_count / tipping_total_count) * 100
    tipping_non_zero_percentage = (tipping_non_zero_count / tipping_total_count) * 100

    print(f"Percentage of tipping values equal to 0.0: {tipping_zero_percentage:.2f}%")
    print(f"Percentage of tipping values non-zero: {tipping_non_zero_percentage:.2f}%")

# Marshall

## Basic

In [69]:
marshall_raw_df = pd.read_csv("station_data_raw_original/marshall_raw.csv", index_col=0)
marshall_processed_df = pd.read_csv("station_data_raw_original/marshall_processed.csv", index_col=0)

In [70]:
marshall_processed_df.describe()


Unnamed: 0,bmp180_temp,bmp180_pres,bmp180_slp,bmp180_alt,bmp280_temp,bmp280_pres,bmp280_slp,bmp280_alt,bme_temp,bme_pres,...,htu_temp,htu_hum,mcp9808,tipping,vis_light,ir_light,uv_light,wind_dir,wind_speed,tas
count,0.0,0.0,0.0,0.0,17070.0,17072.0,17072.0,0.0,0.0,0.0,...,27336.0,29608.0,12585.0,32076.0,18879.0,18506.0,10961.0,30829.0,16382.0,30578.0
mean,,,,,13.291198,996.852766,1015.925743,,,,...,13.14369,68.725581,12.045101,0.074629,344.47047,1133.584243,79.520208,209.613783,0.849525,285.976937
std,,,,,8.730016,21.877956,22.364367,,,,...,8.538697,17.703144,8.926236,2.801776,158.352829,1670.567697,99.1603,105.766542,0.633719,8.628116
min,,,,,-10.915,600.49,611.47,,,,...,-11.0,12.1,-11.2,0.0,0.0,0.0,0.0,0.013044,0.0015,262.106667
25%,,,,,6.09,993.315,1012.13,,,,...,6.2,56.3,4.65,0.0,261.0,253.0,12.0,122.217641,0.348042,278.845
50%,,,,,13.0825,998.255,1017.15,,,,...,12.85,70.3,11.2,0.0,264.5,303.5,36.0,230.813037,0.75,285.55
75%,,,,,20.04,1003.01,1022.27,,,,...,19.6,82.75,19.1,0.0,342.5,1079.75,110.0,299.266364,1.224407,292.771875
max,,,,,37.91,1023.065,1043.63,,,,...,37.7,99.8,37.1,354.4,1094.0,8779.0,448.0,359.995453,4.215,310.721667


In [71]:
marshall_raw_df.describe()

Unnamed: 0,bmp180_temp,bmp180_pres,bmp180_slp,bmp180_alt,bmp280_temp,bmp280_pres,bmp280_slp,bmp280_alt,bme_temp,bme_pres,...,bme_hum,htu_temp,htu_hum,mcp9808,tipping,vis_light,ir_light,uv_light,wind_dir,wind_speed
count,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,...,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0
mean,-999.99,-999.99,-999.99,-999.99,-568.8633,-165.1387,-157.3345,-508.9579,-999.99,-999.99,...,-999.99,-310.0451,-288.3837,-664.3431,-280.0229,-422.4009,-93.6824,-548.4988,-148.7072,-628.9503
std,4.661161e-12,4.661161e-12,4.661161e-12,4.661161e-12,501.5931,973.5705,982.7422,572.7031,4.661161e-12,4.661161e-12,...,4.661161e-12,471.7284,504.3525,476.1447,449.0081,671.8095,1513.093,521.2565,539.794,483.401
min,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99
25%,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99
50%,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,5.8,56.2,-999.99,0.0,-999.99,-999.99,-999.99,112.8,-999.99
75%,-999.99,-999.99,-999.99,-999.99,11.63,995.62,1014.45,159.0,-999.99,-999.99,...,-999.99,15.5,76.3,3.5,0.0,262.0,254.0,3.0,256.5,0.36
max,-999.99,-999.99,-999.99,-999.99,81.47,1087.58,1107.61,159.0,-999.99,-999.99,...,-999.99,81.4,99.9,124.7,28.0,1630.0,13541.0,736.0,360.0,98.24


In [72]:
print(marshall_processed_df.info())
print(marshall_raw_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 44554 entries, 2017-04-24 07:00:00 to 2022-05-23 23:00:00
Data columns (total 23 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   bmp180_temp  0 non-null      float64
 1   bmp180_pres  0 non-null      float64
 2   bmp180_slp   0 non-null      float64
 3   bmp180_alt   0 non-null      float64
 4   bmp280_temp  17070 non-null  float64
 5   bmp280_pres  17072 non-null  float64
 6   bmp280_slp   17072 non-null  float64
 7   bmp280_alt   0 non-null      float64
 8   bme_temp     0 non-null      float64
 9   bme_pres     0 non-null      float64
 10  bme_slp      0 non-null      float64
 11  bme_alt      0 non-null      float64
 12  bme_hum      0 non-null      float64
 13  htu_temp     27336 non-null  float64
 14  htu_hum      29608 non-null  float64
 15  mcp9808      12585 non-null  float64
 16  tipping      32076 non-null  float64
 17  vis_light    18879 non-null  float64
 18  ir_light     18506 

In [73]:
counting_tipping_values(marshall_raw_df, "raw")

Raw or processed: raw
Tipping equals 0.0: 1918917
Tipping is non-zero and not NaN: 754290
Total number of tipping measurements (non-NaN): 2673207
Percentage of tipping values equal to 0.0: 71.78%
Percentage of tipping values non-zero: 28.22%


In [74]:
counting_tipping_values(marshall_processed_df, "processed")

Raw or processed: processed
Tipping equals 0.0: 30236
Tipping is non-zero and not NaN: 1840
Total number of tipping measurements (non-NaN): 32076
Percentage of tipping values equal to 0.0: 94.26%
Percentage of tipping values non-zero: 5.74%


# Vienna

## Basic

In [75]:
vienna_raw_df = pd.read_csv("station_data_raw_original/vienna_raw.csv", index_col=0)
vienna_processed_df = pd.read_csv("station_data_raw_original/vienna_processed.csv", index_col=0)

In [76]:
vienna_processed_df.describe()

Unnamed: 0,bmp180_temp,bmp180_pres,bmp180_slp,bmp180_alt,bmp280_temp,bmp280_pres,bmp280_slp,bmp280_alt,bme_temp,bme_pres,...,htu_temp,htu_hum,mcp9808,tipping,vis_light,ir_light,uv_light,wind_dir,wind_speed,tas
count,0.0,0.0,0.0,0.0,17070.0,17072.0,17072.0,0.0,0.0,0.0,...,27336.0,29608.0,12585.0,32076.0,18879.0,18506.0,10961.0,30829.0,16382.0,30578.0
mean,,,,,13.291198,996.852766,1015.925743,,,,...,13.14369,68.725581,12.045101,0.074629,344.47047,1133.584243,79.520208,209.613783,0.849525,285.976937
std,,,,,8.730016,21.877956,22.364367,,,,...,8.538697,17.703144,8.926236,2.801776,158.352829,1670.567697,99.1603,105.766542,0.633719,8.628116
min,,,,,-10.915,600.49,611.47,,,,...,-11.0,12.1,-11.2,0.0,0.0,0.0,0.0,0.013044,0.0015,262.106667
25%,,,,,6.09,993.315,1012.13,,,,...,6.2,56.3,4.65,0.0,261.0,253.0,12.0,122.217641,0.348042,278.845
50%,,,,,13.0825,998.255,1017.15,,,,...,12.85,70.3,11.2,0.0,264.5,303.5,36.0,230.813037,0.75,285.55
75%,,,,,20.04,1003.01,1022.27,,,,...,19.6,82.75,19.1,0.0,342.5,1079.75,110.0,299.266364,1.224407,292.771875
max,,,,,37.91,1023.065,1043.63,,,,...,37.7,99.8,37.1,354.4,1094.0,8779.0,448.0,359.995453,4.215,310.721667


In [77]:
vienna_raw_df.describe()

Unnamed: 0,bmp180_temp,bmp180_pres,bmp180_slp,bmp180_alt,bmp280_temp,bmp280_pres,bmp280_slp,bmp280_alt,bme_temp,bme_pres,...,bme_hum,htu_temp,htu_hum,mcp9808,tipping,vis_light,ir_light,uv_light,wind_dir,wind_speed
count,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,...,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0,2673207.0
mean,-999.99,-999.99,-999.99,-999.99,-568.8633,-165.1387,-157.3345,-508.9579,-999.99,-999.99,...,-999.99,-310.0451,-288.3837,-664.3431,-280.0229,-422.4009,-93.6824,-548.4988,-148.7072,-628.9503
std,4.661161e-12,4.661161e-12,4.661161e-12,4.661161e-12,501.5931,973.5705,982.7422,572.7031,4.661161e-12,4.661161e-12,...,4.661161e-12,471.7284,504.3525,476.1447,449.0081,671.8095,1513.093,521.2565,539.794,483.401
min,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99
25%,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99
50%,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,5.8,56.2,-999.99,0.0,-999.99,-999.99,-999.99,112.8,-999.99
75%,-999.99,-999.99,-999.99,-999.99,11.63,995.62,1014.45,159.0,-999.99,-999.99,...,-999.99,15.5,76.3,3.5,0.0,262.0,254.0,3.0,256.5,0.36
max,-999.99,-999.99,-999.99,-999.99,81.47,1087.58,1107.61,159.0,-999.99,-999.99,...,-999.99,81.4,99.9,124.7,28.0,1630.0,13541.0,736.0,360.0,98.24


In [78]:
print(vienna_processed_df.info())
print(vienna_raw_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 44554 entries, 2017-04-25 00:00:00 to 2022-05-23 23:00:00
Data columns (total 23 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   bmp180_temp  0 non-null      float64
 1   bmp180_pres  0 non-null      float64
 2   bmp180_slp   0 non-null      float64
 3   bmp180_alt   0 non-null      float64
 4   bmp280_temp  17070 non-null  float64
 5   bmp280_pres  17072 non-null  float64
 6   bmp280_slp   17072 non-null  float64
 7   bmp280_alt   0 non-null      float64
 8   bme_temp     0 non-null      float64
 9   bme_pres     0 non-null      float64
 10  bme_slp      0 non-null      float64
 11  bme_alt      0 non-null      float64
 12  bme_hum      0 non-null      float64
 13  htu_temp     27336 non-null  float64
 14  htu_hum      29608 non-null  float64
 15  mcp9808      12585 non-null  float64
 16  tipping      32076 non-null  float64
 17  vis_light    18879 non-null  float64
 18  ir_light     18506 

In [79]:
counting_tipping_values(vienna_raw_df, "raw")

Raw or processed: raw
Tipping equals 0.0: 1918917
Tipping is non-zero and not NaN: 754290
Total number of tipping measurements (non-NaN): 2673207
Percentage of tipping values equal to 0.0: 71.78%
Percentage of tipping values non-zero: 28.22%


In [80]:
counting_tipping_values(vienna_processed_df, "processed")

Raw or processed: processed
Tipping equals 0.0: 30236
Tipping is non-zero and not NaN: 1840
Total number of tipping measurements (non-NaN): 32076
Percentage of tipping values equal to 0.0: 94.26%
Percentage of tipping values non-zero: 5.74%


# Barbados

In [81]:
barbados_raw_df = pd.read_csv("station_data_raw_original/barbados_raw.csv", index_col=0)
barbados_processed_df = pd.read_csv("station_data_raw_original/barbados_processed.csv", index_col=0)


## Basic

In [82]:
barbados_processed_df.describe()

Unnamed: 0,bmp180_temp,bmp180_pres,bmp180_slp,bmp180_alt,bmp280_temp,bmp280_pres,bmp280_slp,bmp280_alt,bme_temp,bme_pres,...,htu_temp,htu_hum,mcp9808,tipping,vis_light,ir_light,uv_light,wind_dir,wind_speed,tas
count,0.0,0.0,0.0,0.0,19275.0,19304.0,19304.0,0.0,0.0,0.0,...,19539.0,15417.0,17389.0,29008.0,22612.0,21491.0,13230.0,0.0,26956.0,24780.0
mean,,,,,26.363526,984.414959,1015.581027,,,,...,22.188743,85.960735,25.575539,0.193285,419.594529,1990.336373,147.866062,,3.065972,297.753908
std,,,,,1.96642,11.590826,11.95317,,,,...,6.201811,8.186707,2.048191,1.34119,228.350229,2400.179046,129.135906,,2.429038,2.330054
min,,,,,-21.155,648.6,669.32,,,,...,1.5,55.9,0.0,0.0,0.0,0.0,0.0,,0.001,251.995
25%,,,,,24.975,983.83,1014.975,,,,...,21.9,80.15,24.2,0.0,260.0,253.5,29.0,,1.963417,296.56125
50%,,,,,26.16,985.0,1016.21,,,,...,24.4,86.9,25.4,0.0,266.5,499.0,124.5,,2.895667,297.826667
75%,,,,,27.66,986.125,1017.37,,,,...,26.2,92.55,27.1,0.0,536.625,3262.25,227.0,,3.824583,299.106667
max,,,,,33.41,990.78,1021.94,,,,...,31.1,99.8,32.2,66.2,1221.0,10258.5,516.5,,74.617241,303.87


In [83]:
barbados_raw_df.describe()

Unnamed: 0,bmp180_temp,bmp180_pres,bmp180_slp,bmp180_alt,bmp280_temp,bmp280_pres,bmp280_slp,bmp280_alt,bme_temp,bme_pres,...,bme_hum,htu_temp,htu_hum,mcp9808,tipping,vis_light,ir_light,uv_light,wind_dir,wind_speed
count,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,...,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0,1817508.0
mean,-999.99,-999.99,-999.99,-999.99,-212.3054,484.2709,506.9669,-21.88401,-999.99,-999.99,...,-999.99,-196.7659,-436.3836,-260.3876,-43.56546,91.18536,1256.991,-164.6803,-762.592,-41.9657
std,2.501111e-12,2.501111e-12,2.501111e-12,2.501111e-12,433.2454,822.5395,835.3257,537.9648,2.501111e-12,2.501111e-12,...,2.501111e-12,419.785,542.6432,459.2763,204.1328,637.4778,2569.836,474.4023,425.4855,207.2575
min,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,...,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99,-999.99
25%,-999.99,-999.99,-999.99,-999.99,23.12,648.6,669.32,274.0,-999.99,-999.99,...,-999.99,9.3,-999.99,-999.99,0.0,259.0,252.0,1.0,-999.99,1.24
50%,-999.99,-999.99,-999.99,-999.99,24.85,983.62,1014.75,274.0,-999.99,-999.99,...,-999.99,23.5,69.7,24.2,0.0,261.0,254.0,2.0,-999.99,2.55
75%,-999.99,-999.99,-999.99,-999.99,26.7,985.47,1016.69,274.0,-999.99,-999.99,...,-999.99,25.3,87.5,25.9,0.0,430.0,1988.0,92.0,-999.99,3.65
max,-999.99,-999.99,-999.99,-999.99,84.41,1084.97,1119.66,274.0,-999.99,-999.99,...,-999.99,124.3,99.9,123.9,12.6,1960.0,14052.0,984.0,0.0,99.98


In [84]:
print(barbados_processed_df.info())
print(barbados_raw_df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 30293 entries, 2020-06-08 15:00:00 to 2023-11-21 23:00:00
Data columns (total 23 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   bmp180_temp  0 non-null      float64
 1   bmp180_pres  0 non-null      float64
 2   bmp180_slp   0 non-null      float64
 3   bmp180_alt   0 non-null      float64
 4   bmp280_temp  19275 non-null  float64
 5   bmp280_pres  19304 non-null  float64
 6   bmp280_slp   19304 non-null  float64
 7   bmp280_alt   0 non-null      float64
 8   bme_temp     0 non-null      float64
 9   bme_pres     0 non-null      float64
 10  bme_slp      0 non-null      float64
 11  bme_alt      0 non-null      float64
 12  bme_hum      0 non-null      float64
 13  htu_temp     19539 non-null  float64
 14  htu_hum      15417 non-null  float64
 15  mcp9808      17389 non-null  float64
 16  tipping      29008 non-null  float64
 17  vis_light    22612 non-null  float64
 18  ir_light     21491 

In [85]:
counting_tipping_values(barbados_raw_df, "raw")

Raw or processed: raw
Tipping equals 0.0: 1721851
Tipping is non-zero and not NaN: 95657
Total number of tipping measurements (non-NaN): 1817508
Percentage of tipping values equal to 0.0: 94.74%
Percentage of tipping values non-zero: 5.26%


In [86]:
counting_tipping_values(barbados_processed_df, "processed")

Raw or processed: processed
Tipping equals 0.0: 26140
Tipping is non-zero and not NaN: 2868
Total number of tipping measurements (non-NaN): 29008
Percentage of tipping values equal to 0.0: 90.11%
Percentage of tipping values non-zero: 9.89%
