In [1]:
from powderalert.ml_logic.preprocessor import define_X, preprocess
from powderalert.ml_logic.data import clean_data

Coordinates: 47.2400016784668°N, 11.84000015258789°E
Elevation: 1818.0 m asl
Timezone: None (None)
UTC Offset: 0 seconds
✅ Prediction data fetched successfully
Shape: (96, 23)
date                          datetime64[ns, UTC]
temperature_2m                            float32
relative_humidity_2m                      float32
dew_point_2m                              float32
precipitation                             float32
rain                                      float32
snowfall                                  float32
snow_depth                                float32
weather_code                              float32
pressure_msl                              float32
surface_pressure                          float32
cloud_cover                               float32
cloud_cover_low                           float32
cloud_cover_mid                           float32
cloud_cover_high                          float32
et0_fao_evapotranspiration                float32
vapour_pressure_deficit 

In [2]:
import openmeteo_requests
import matplotlib as plt
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 47.26580883196723,
	"longitude": 11.84457426992035,
	"start_date": "2009-01-01",
	"end_date": "2024-01-01",
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm", "sunshine_duration"],
	"models": "best_match"
}
responses = openmeteo.weather_api(url, params=params)


# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


Coordinates 47.27592086791992°N 12.058823585510254°E
Elevation 1818.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [3]:
hourly = response.Hourly()

# Assign variables sequentially, ensuring alignment with their indices
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(5).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(6).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(7).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(8).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(11).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(12).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(13).ValuesAsNumpy()
hourly_et0_fao_evapotranspiration = hourly.Variables(14).ValuesAsNumpy()
hourly_vapour_pressure_deficit = hourly.Variables(15).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(16).ValuesAsNumpy()
hourly_wind_speed_100m = hourly.Variables(17).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(18).ValuesAsNumpy()
hourly_wind_direction_100m = hourly.Variables(19).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(20).ValuesAsNumpy()
hourly_sunshine_duration = hourly.Variables(21).ValuesAsNumpy()

# Create the DataFrame
hourly_data = {
    "date": pd.date_range(
        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
        freq=pd.Timedelta(seconds=hourly.Interval()),
        inclusive="left"
    )
}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["weather_code"] = hourly_weather_code
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["sunshine_duration"] = hourly_sunshine_duration

hourly_dataframe = pd.DataFrame(data=hourly_data)
hourly_dataframe


Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,...,cloud_cover_mid,cloud_cover_high,et0_fao_evapotranspiration,vapour_pressure_deficit,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,wind_gusts_10m,sunshine_duration
0,2009-01-01 00:00:00+00:00,-10.842501,72.968170,-14.742500,0.0,0.0,0.00,0.92,3.0,1025.199951,...,95.0,24.0,0.001225,0.072607,6.989935,11.275530,191.888641,196.699326,29.879999,-0.1425
1,2009-01-01 01:00:00+00:00,-10.642500,73.911522,-14.392500,0.0,0.0,0.00,0.92,3.0,1025.199951,...,95.0,46.0,0.000000,0.071200,5.860375,8.557102,190.619598,202.249069,25.559999,-0.1425
2,2009-01-01 02:00:00+00:00,-10.492500,74.547844,-14.142500,0.1,0.0,0.07,0.92,71.0,1025.000000,...,89.0,51.0,0.000000,0.070298,5.154416,6.696387,192.094742,216.253922,20.160000,-0.1925
3,2009-01-01 03:00:00+00:00,-10.442499,75.476273,-13.942499,0.1,0.0,0.07,0.92,71.0,1025.300049,...,92.0,75.0,0.000000,0.068005,3.758510,4.896529,196.699326,252.897186,15.119999,-0.1925
4,2009-01-01 04:00:00+00:00,-10.542500,78.596596,-13.542500,0.2,0.0,0.14,0.93,71.0,1025.500000,...,92.0,81.0,0.000000,0.058886,2.099143,5.860375,239.036301,317.489594,14.759999,-0.1925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131491,2024-01-01 19:00:00+00:00,-13.821000,86.608582,-15.571000,0.0,0.0,0.00,1.23,3.0,1017.099976,...,10.0,94.0,0.000000,0.028286,5.154415,9.000000,155.224884,216.869980,26.639999,0.6790
131492,2024-01-01 20:00:00+00:00,-14.571000,86.166550,-16.371000,0.0,0.0,0.00,1.23,1.0,1017.400024,...,21.0,6.0,0.000000,0.027474,6.151683,10.464797,159.443878,206.564987,25.919998,0.6790
131493,2024-01-01 21:00:00+00:00,-14.571000,84.035316,-16.671001,0.0,0.0,0.00,1.23,3.0,1017.099976,...,24.0,92.0,0.000000,0.031705,6.618519,11.874544,157.619827,194.036270,25.559999,0.6790
131494,2024-01-01 22:00:00+00:00,-13.721000,80.381676,-16.371000,0.0,0.0,0.00,1.23,3.0,1016.900024,...,2.0,100.0,0.000000,0.041773,6.763786,12.429127,154.798904,190.007919,26.280001,0.6790


In [4]:
hourly_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131496 entries, 0 to 131495
Data columns (total 23 columns):
 #   Column                      Non-Null Count   Dtype              
---  ------                      --------------   -----              
 0   date                        131496 non-null  datetime64[ns, UTC]
 1   temperature_2m              131496 non-null  float32            
 2   relative_humidity_2m        131496 non-null  float32            
 3   dew_point_2m                131496 non-null  float32            
 4   precipitation               131496 non-null  float32            
 5   rain                        131496 non-null  float32            
 6   snowfall                    131496 non-null  float32            
 7   snow_depth                  131496 non-null  float32            
 8   weather_code                131496 non-null  float32            
 9   pressure_msl                131496 non-null  float32            
 10  surface_pressure            131496 non-null 

In [5]:
df = hourly_dataframe
df.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,...,cloud_cover_mid,cloud_cover_high,et0_fao_evapotranspiration,vapour_pressure_deficit,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,wind_gusts_10m,sunshine_duration
0,2009-01-01 00:00:00+00:00,-10.842501,72.96817,-14.7425,0.0,0.0,0.0,0.92,3.0,1025.199951,...,95.0,24.0,0.001225,0.072607,6.989935,11.27553,191.888641,196.699326,29.879999,-0.1425
1,2009-01-01 01:00:00+00:00,-10.6425,73.911522,-14.3925,0.0,0.0,0.0,0.92,3.0,1025.199951,...,95.0,46.0,0.0,0.0712,5.860375,8.557102,190.619598,202.249069,25.559999,-0.1425
2,2009-01-01 02:00:00+00:00,-10.4925,74.547844,-14.1425,0.1,0.0,0.07,0.92,71.0,1025.0,...,89.0,51.0,0.0,0.070298,5.154416,6.696387,192.094742,216.253922,20.16,-0.1925
3,2009-01-01 03:00:00+00:00,-10.442499,75.476273,-13.942499,0.1,0.0,0.07,0.92,71.0,1025.300049,...,92.0,75.0,0.0,0.068005,3.75851,4.896529,196.699326,252.897186,15.119999,-0.1925
4,2009-01-01 04:00:00+00:00,-10.5425,78.596596,-13.5425,0.2,0.0,0.14,0.93,71.0,1025.5,...,92.0,81.0,0.0,0.058886,2.099143,5.860375,239.036301,317.489594,14.759999,-0.1925


In [6]:
df.shape

(131496, 23)

In [8]:
cleaned_df = clean_data(df)
preprocessed_df = preprocess(cleaned_df)

✅ Data cleaned
✅ Processed data, with shape (131496, 28)


In [9]:
cleaned_df.head(2)

Unnamed: 0_level_0,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,surface_pressure,...,wind_direction_10m,wind_direction_100m,wind_gusts_10m,sunshine_duration,hour_sin,hour_cos,day_of_week_sin,day_of_week_cos,month_sin,month_cos
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-01 00:00:00,-10.842501,72.96817,-14.7425,0.0,0.0,0.0,0.92,3.0,1025.199951,813.257202,...,191.888641,196.699326,29.879999,-0.1425,0.0,1.0,0.433884,-0.900969,0.0,1.0
2009-01-01 01:00:00,-10.6425,73.911522,-14.3925,0.0,0.0,0.0,0.92,3.0,1025.199951,813.397522,...,190.619598,202.249069,25.559999,-0.1425,0.258819,0.965926,0.433884,-0.900969,0.0,1.0


In [None]:
preprocessed_df.head(2)

Unnamed: 0,weather_code_encoded,hour_sin,hour_cos,day_of_week_sin,day_of_week_cos,month_sin,month_cos,cloud_cover,cloud_cover_high,cloud_cover_low,...,snowfall,sunshine_duration,surface_pressure,temperature_2m,vapour_pressure_deficit,wind_direction_100m,wind_direction_10m,wind_gusts_10m,wind_speed_100m,wind_speed_10m
0,3.0,2.6192090000000003e-17,1.414214,0.614121,-1.273928,0.004523,1.419691,-10.842501,72.96817,-14.7425,...,95.0,24.0,0.001225,0.072607,6.989935,11.27553,191.888641,196.699326,29.879999,-0.1425
1,3.0,0.3660254,1.366025,0.614121,-1.273928,0.004523,1.419691,-10.6425,73.911522,-14.3925,...,95.0,46.0,0.0,0.0712,5.860375,8.557102,190.619598,202.249069,25.559999,-0.1425


In [9]:
preprocessed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131496 entries, 0 to 131495
Data columns (total 28 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   weather_code_encoded        131496 non-null  float64
 1   hour_sin                    131496 non-null  float64
 2   hour_cos                    131496 non-null  float64
 3   day_of_week_sin             131496 non-null  float64
 4   day_of_week_cos             131496 non-null  float64
 5   month_sin                   131496 non-null  float64
 6   month_cos                   131496 non-null  float64
 7   cloud_cover                 131496 non-null  float64
 8   cloud_cover_high            131496 non-null  float64
 9   cloud_cover_low             131496 non-null  float64
 10  cloud_cover_mid             131496 non-null  float64
 11  dew_point_2m                131496 non-null  float64
 12  et0_fao_evapotranspiration  131496 non-null  float64
 13  precipitation 

In [11]:
df.to_csv('openmeteo_api_NEW_train_dataset.csv', index=True)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 131496 entries, 0 to 131495
Data columns (total 23 columns):
 #   Column                      Non-Null Count   Dtype         
---  ------                      --------------   -----         
 0   date                        131496 non-null  datetime64[ns]
 1   temperature_2m              131496 non-null  float32       
 2   relative_humidity_2m        131496 non-null  float32       
 3   dew_point_2m                131496 non-null  float32       
 4   precipitation               131496 non-null  float32       
 5   rain                        131496 non-null  float32       
 6   snowfall                    131496 non-null  float32       
 7   snow_depth                  131496 non-null  float32       
 8   weather_code                131496 non-null  float32       
 9   pressure_msl                131496 non-null  float32       
 10  surface_pressure            131496 non-null  float32       
 11  cloud_cover                 131496 non-

In [10]:
clean_data.head(2)

Unnamed: 0_level_0,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,surface_pressure,...,cloud_cover_mid,cloud_cover_high,et0_fao_evapotranspiration,vapour_pressure_deficit,wind_speed_10m,wind_speed_100m,wind_direction_10m,wind_direction_100m,wind_gusts_10m,sunshine_duration
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-01-01 00:00:00,-10.842501,72.96817,-14.7425,0.0,0.0,0.0,0.92,3.0,1025.199951,813.257202,...,95.0,24.0,0.001225,0.072607,6.989935,11.27553,191.888641,196.699326,29.879999,-0.1425
2009-01-01 01:00:00,-10.6425,73.911522,-14.3925,0.0,0.0,0.0,0.92,3.0,1025.199951,813.397522,...,95.0,46.0,0.0,0.0712,5.860375,8.557102,190.619598,202.249069,25.559999,-0.1425


In [9]:
df = pd.read_csv('/Users/torstenwrigley/code/MadMax1995bb/powder_alert2.0/raw_data/openmeteo_api_zentralstation.csv')
df

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation,rain,snowfall,snow_depth,weather_code,...,wind_gusts_10m,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm,sunshine_duration
0,2009-01-01 00:00:00+00:00,-10.842501,72.968170,-14.742500,-15.432775,0.0,0.0,0.00,0.92,3.0,...,29.880000,-0.1425,0.2575,1.1575,2.6575,0.21,0.214,0.209,0.269,0.0
1,2009-01-01 01:00:00+00:00,-10.642500,73.911520,-14.392500,-15.048215,0.0,0.0,0.00,0.92,3.0,...,25.560000,-0.1425,0.2575,1.1075,2.6575,0.21,0.214,0.209,0.269,0.0
2,2009-01-01 02:00:00+00:00,-10.492500,74.547844,-14.142500,-14.780781,0.1,0.0,0.07,0.92,71.0,...,20.160000,-0.1925,0.2575,1.1075,2.6575,0.21,0.214,0.209,0.269,0.0
3,2009-01-01 03:00:00+00:00,-10.442499,75.476270,-13.942499,-14.515449,0.1,0.0,0.07,0.92,71.0,...,15.119999,-0.1925,0.2575,1.1075,2.6575,0.21,0.214,0.209,0.269,0.0
4,2009-01-01 04:00:00+00:00,-10.542500,78.596596,-13.542500,-14.349460,0.2,0.0,0.14,0.93,71.0,...,14.759999,-0.1925,0.2575,1.1075,2.6575,0.21,0.214,0.209,0.269,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131491,2024-01-01 19:00:00+00:00,-13.821000,86.608580,-15.571000,-18.189102,0.0,0.0,0.00,1.23,3.0,...,26.640000,0.6790,0.9290,1.7790,3.6290,0.24,0.246,0.238,0.267,0.0
131492,2024-01-01 20:00:00+00:00,-14.571000,86.166550,-16.371000,-19.125450,0.0,0.0,0.00,1.23,1.0,...,25.919998,0.6790,0.9290,1.7790,3.6290,0.24,0.246,0.238,0.267,0.0
131493,2024-01-01 21:00:00+00:00,-14.571000,84.035320,-16.671001,-19.208189,0.0,0.0,0.00,1.23,3.0,...,25.560000,0.6790,0.9290,1.7790,3.6290,0.24,0.246,0.238,0.267,0.0
131494,2024-01-01 22:00:00+00:00,-13.721000,80.381676,-16.371000,-18.364521,0.0,0.0,0.00,1.23,3.0,...,26.280000,0.6790,0.9290,1.7790,3.6290,0.24,0.246,0.238,0.267,0.0
