In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry
from powderalert.ml_logic.preprocessor import define_X, preprocess
from powderalert.ml_logic.data import clean_data

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 47.26580883196723,
	"longitude": 11.84457426992035,
	"past_days": 31,
    "forecast_days": 0,
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm", "sunshine_duration"],
	"models": "best_match"
}

responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


Coordinates 47.2400016784668°N 11.84000015258789°E
Elevation 1818.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [3]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(3).ValuesAsNumpy()
hourly_rain = hourly.Variables(4).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(5).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(6).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(7).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(8).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(11).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(12).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(13).ValuesAsNumpy()
hourly_et0_fao_evapotranspiration = hourly.Variables(14).ValuesAsNumpy()
hourly_vapour_pressure_deficit = hourly.Variables(15).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(16).ValuesAsNumpy()
hourly_wind_speed_100m = hourly.Variables(17).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(18).ValuesAsNumpy()
hourly_wind_direction_100m = hourly.Variables(19).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(20).ValuesAsNumpy()
hourly_soil_temperature_0_to_7cm = hourly.Variables(21).ValuesAsNumpy()
hourly_soil_temperature_7_to_28cm = hourly.Variables(22).ValuesAsNumpy()
hourly_soil_temperature_28_to_100cm = hourly.Variables(23).ValuesAsNumpy()
hourly_soil_temperature_100_to_255cm = hourly.Variables(24).ValuesAsNumpy()
hourly_soil_moisture_0_to_7cm = hourly.Variables(25).ValuesAsNumpy()
hourly_soil_moisture_7_to_28cm = hourly.Variables(26).ValuesAsNumpy()
hourly_soil_moisture_28_to_100cm = hourly.Variables(27).ValuesAsNumpy()
hourly_soil_moisture_100_to_255cm = hourly.Variables(28).ValuesAsNumpy()
hourly_sunshine_duration = hourly.Variables(29).ValuesAsNumpy()


hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["weather_code"] = hourly_weather_code
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
hourly_data["soil_temperature_7_to_28cm"] = hourly_soil_temperature_7_to_28cm
hourly_data["soil_temperature_28_to_100cm"] = hourly_soil_temperature_28_to_100cm
hourly_data["soil_temperature_100_to_255cm"] = hourly_soil_temperature_100_to_255cm
hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
hourly_data["soil_moisture_7_to_28cm"] = hourly_soil_moisture_7_to_28cm
hourly_data["soil_moisture_28_to_100cm"] = hourly_soil_moisture_28_to_100cm
hourly_data["soil_moisture_100_to_255cm"] = hourly_soil_moisture_100_to_255cm
hourly_data["sunshine_duration"] = hourly_sunshine_duration

hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,snowfall,snow_depth,weather_code,pressure_msl,...,wind_gusts_10m,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm,sunshine_duration
0,2024-11-15 00:00:00+00:00,-2.374,77.0,-5.856881,0.0,0.0,0.0,0.01,1.0,1028.300049,...,24.480000,,,,,,,,,0.0
1,2024-11-15 01:00:00+00:00,-1.974,73.0,-6.169195,0.0,0.0,0.0,0.01,2.0,1027.400024,...,24.480000,,,,,,,,,0.0
2,2024-11-15 02:00:00+00:00,-2.274,76.0,-5.931446,0.0,0.0,0.0,0.01,0.0,1028.000000,...,21.599998,,,,,,,,,0.0
3,2024-11-15 03:00:00+00:00,-2.224,78.0,-5.541404,0.0,0.0,0.0,0.01,1.0,1027.000000,...,19.799999,,,,,,,,,0.0
4,2024-11-15 04:00:00+00:00,-2.524,80.0,-5.500170,0.0,0.0,0.0,0.01,0.0,1026.699951,...,22.680000,,,,,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,2024-12-15 19:00:00+00:00,-3.974,71.0,-8.457881,0.0,0.0,0.0,0.23,3.0,1035.099976,...,40.320000,,,,,,,,,0.0
740,2024-12-15 20:00:00+00:00,-3.824,77.0,-7.265340,0.0,0.0,0.0,0.23,71.0,1035.699951,...,40.680000,,,,,,,,,0.0
741,2024-12-15 21:00:00+00:00,-2.874,80.0,-5.841569,0.0,0.0,0.0,0.23,71.0,1035.699951,...,42.480000,,,,,,,,,0.0
742,2024-12-15 22:00:00+00:00,-2.424,82.0,-5.076656,0.0,0.0,0.0,0.23,71.0,1035.500000,...,41.399998,,,,,,,,,0.0


In [4]:
hourly_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 744 entries, 0 to 743
Data columns (total 31 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   date                           744 non-null    datetime64[ns, UTC]
 1   temperature_2m                 744 non-null    float32            
 2   relative_humidity_2m           744 non-null    float32            
 3   dew_point_2m                   744 non-null    float32            
 4   precipitation                  744 non-null    float32            
 5   rain                           744 non-null    float32            
 6   snowfall                       744 non-null    float32            
 7   snow_depth                     744 non-null    float32            
 8   weather_code                   744 non-null    float32            
 9   pressure_msl                   744 non-null    float32            
 10  surface_pressure          

In [21]:
file_path = "/Users/torstenwrigley/code/MadMax1995bb/powder_alert2.0/raw_data/prediction_dataset_last48_next24.csv"

train_dataset = pd.read_csv(file_path)

In [22]:
train_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 32 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Unnamed: 0                     120 non-null    int64  
 1   weather_code_encoded           120 non-null    float64
 2   cloud_cover                    120 non-null    float64
 3   cloud_cover_high               120 non-null    float64
 4   cloud_cover_low                120 non-null    float64
 5   cloud_cover_mid                120 non-null    float64
 6   dew_point_2m                   120 non-null    float64
 7   et0_fao_evapotranspiration     120 non-null    float64
 8   precipitation                  120 non-null    float64
 9   pressure_msl                   120 non-null    float64
 10  rain                           120 non-null    float64
 11  relative_humidity_2m           120 non-null    float64
 12  snow_depth                     120 non-null    flo

In [19]:
predict_df = clean_data(hourly_dataframe)

✅ Data cleaned


In [20]:
predict_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 120 entries, 2024-12-10 00:00:00 to 2024-12-14 23:00:00
Data columns (total 30 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   temperature_2m                 120 non-null    float32
 1   relative_humidity_2m           120 non-null    float32
 2   dew_point_2m                   120 non-null    float32
 3   precipitation                  120 non-null    float32
 4   rain                           120 non-null    float32
 5   snowfall                       120 non-null    float32
 6   snow_depth                     120 non-null    float32
 7   weather_code                   120 non-null    float32
 8   pressure_msl                   120 non-null    float32
 9   surface_pressure               120 non-null    float32
 10  cloud_cover                    120 non-null    float32
 11  cloud_cover_low                120 non-null    float32
 12  cloud_cover_m