In [1]:
import pandas as pd
import requests
from datetime import datetime, timedelta
import pickle 
import numpy as np

def interpolate_to_5_minutes(data):
    original_times = data['minutely_15']['time']
    original_precipitation = data['minutely_15']['precipitation']

    num_intervals = len(original_times) * 3
    new_times = []
    new_precipitation = []

    for i in range(len(original_times)):
        time = datetime.fromisoformat(original_times[i])
        for j in range(3):
            new_time = time + timedelta(minutes=5 * j)
            new_times.append(new_time.isoformat())
            new_precipitation.append(original_precipitation[i] / 3)

    rainfall_2_hours = []
    for i in range(len(new_times)):
        start_index = max(i - 24, 0)
        total_rainfall = sum(new_precipitation[start_index:i+1])
        rainfall_2_hours.append(total_rainfall)

    return new_times, new_precipitation, rainfall_2_hours\


def iterative_forecasting_with_initialization_and_scaling(model, scaler, X, num_forecasts, num_lags=6):
    """
    Perform iterative forecasting using the model's own predictions to create lagged features.
    The first prediction is made without scaling, and subsequent predictions are made with scaled features.

    Parameters:
    model (Model): The trained prediction model.
    scaler (StandardScaler): Scaler used for transforming the features.
    X (DataFrame): Feature set with the initial row having zeros for lagged features.
    num_forecasts (int): Number of future time steps to forecast.
    num_lags (int): Number of lagged features.

    Returns:
    DataFrame: DataFrame with the predictions appended.
    """

    # Initialize the lagged features in the first row with zeros
    for lag in range(1, num_lags + 1):
        X[f'percentage_previous_{lag}'] = 0

    # Store the initial features to update them iteratively
    current_features = X.iloc[0].copy()

    # First prediction without scaling
    model_input = current_features.values.reshape(1, -1)
    next_prediction = model.predict(model_input)[0]

    # Update the lagged features for the first prediction
    for lag in range(num_lags - 1, 0, -1):
        current_features[f'percentage_previous_{lag + 1}'] = current_features[f'percentage_previous_{lag}']
    current_features['percentage_previous_1'] = next_prediction

    # Add the updated features and prediction to the DataFrame
    X.loc[len(X)] = current_features

    # Perform subsequent predictions with scaling
    for i in range(1, num_forecasts):
        # Update the lagged features with the new prediction
        for lag in range(num_lags - 1, 0, -1):
            current_features[f'percentage_previous_{lag + 1}'] = current_features[f'percentage_previous_{lag}']
        current_features['percentage_previous_1'] = next_prediction

        # Scale all features for the next prediction
        scaled_features = scaler.transform(current_features.values.reshape(1, -1))

        # Predict the next target with scaled features
        next_prediction = model.predict(scaled_features)[0]

        # Update the current features with the latest prediction
        current_features = X.iloc[i].copy() if i < len(X) else current_features.copy()
        current_features.iloc[-num_lags:] = scaled_features[0, -num_lags:]

        # Add the updated features and prediction to the DataFrame
        if i < len(X):
            X.iloc[i] = current_features
        else:
            X.loc[len(X)] = current_features

    return X


In [2]:
# Fetch data from the API
url = 'https://api.open-meteo.com/v1/forecast?latitude=51.55202&longitude=4.586668&minutely_15=precipitation&forecast_days=3'
response = requests.get(url)
data = response.json() if response.status_code == 200 else {}

# Prepare the DataFrame from API data
if data:
    new_times, new_precipitation, rainfall_2_hours = interpolate_to_5_minutes(data)

    api_df = pd.DataFrame({
        'timestamp': pd.to_datetime(new_times),
        'day': pd.to_datetime(new_times).day,
        'month': pd.to_datetime(new_times).month,
        'dayofweek': pd.to_datetime(new_times).dayofweek,
        'hour': pd.to_datetime(new_times).hour,
        'rainfall_current': new_precipitation,
        'rainfall_previous_2_hours': rainfall_2_hours
    })
else:
    print("Failed to retrieve or process API data")

# Load model and scaler from pickle files
with open('model_randomforest_14.pkl', 'rb') as file:
    model = pickle.load(file)

with open('scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)
    
# Prepare initial features for forecasting
X_initial = api_df.drop(columns=['timestamp'])  # Adjust based on model's requirements
num_forecasts = 6  # Adjust based on how many predictions you want to make

# Perform iterative forecasting
api_df_with_predictions = iterative_forecasting_with_initialization_and_scaling(model, scaler, X_initial, num_forecasts)

# Show the last few rows with the predictions
api_df_with_predictions.tail(num_forecasts)



Unnamed: 0,day,month,dayofweek,hour,rainfall_current,rainfall_previous_2_hours,percentage_previous_1,percentage_previous_2,percentage_previous_3,percentage_previous_4,percentage_previous_5,percentage_previous_6
859,21.0,12.0,3.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
860,21.0,12.0,3.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
861,21.0,12.0,3.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
862,21.0,12.0,3.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
863,21.0,12.0,3.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
864,19.0,12.0,1.0,0.0,0.0,0.0,8.8005,0.0,0.0,0.0,0.0,0.0
