In [4]:
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta
from functions import get_energy
# Assuming functions.extract_features_for_timestamp and get_energy.get() are defined as in your code
def extract_features_for_timestamp(timestamp, df):
    return [timestamp.hour, timestamp.day, timestamp.month, timestamp.weekday()]
def xgb_quantile_grad_hess(quantile, y_true, y_pred):
    """
    Gradient and Hessian for quantile regression with XGBoost.
    """
    # Prediction error
    error = y_true - y_pred

    # Gradient
    grad = np.where(error > 0, -quantile, -(quantile - 1))

    # Hessian (second derivative)
    hess = np.ones_like(y_pred)

    return grad, hess

In [None]:
# Data preparation (similar to your LSTM model)
df = pd.DataFrame(get_energy.get())  # Or use your input_data

In [14]:
feature_columns = ['hour', 'day', 'month', 'week']
# extract features
df['hour'] = df.index.hour
df['day'] = df.index.day
df['month'] = df.index.month
df['week'] = df.index.weekday
X = df[feature_columns].values
y = df['gesamt'].values.reshape(-1, 1)
horizons = [36, 40, 44, 60, 64, 68]
# Scaling
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)

# Quantiles to predict
quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]

# Train XGBoost models for each quantile
models = {}
for q in quantiles:
    model = xgb.XGBRegressor(objective=lambda y_true, y_pred: xgb_quantile_grad_hess(q, y_true, y_pred))
    model.fit(X_scaled, y_scaled)
    models[q] = model

# Prediction for a specific timestamp
date_str = datetime.now()
future_timestamps = [date_str + timedelta(hours=h) for h in horizons]
prediction_inputs = [extract_features_for_timestamp(ts, df) for ts in future_timestamps]
prediction_inputs_scaled = scaler_X.transform(prediction_inputs)

# Generating predictions
predictions = {}
for q, model in models.items():
    pred_scaled = model.predict(prediction_inputs_scaled)
    pred_original_scale = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1))
    predictions[q] = pred_original_scale

# Format predictions into a DataFrame similar to LSTM model's output
# ...


In [22]:
# Assuming 'predictions' is a dictionary with quantiles as keys and arrays of predictions as values

# Static base forecast date
base_forecast_date = "2024-01-12"

# Horizons used for prediction
horizons = [36, 40, 44, 60, 64, 68]

# Initialize DataFrame for static information
df_static = pd.DataFrame({
    'forecast_date': [base_forecast_date] * len(horizons),
    'target': ['energy'] * len(horizons),
    'horizon': [f'{h} hour' for h in horizons]
})

# Initialize DataFrame for quantile predictions
df_predictions = pd.DataFrame()

# Adding quantile predictions with correctly formatted headers
quantile_column_names = [f'q{q}' for q in quantiles]  # Adjusted for correct column naming
for col_name, q in zip(quantile_column_names, quantiles):
    df_predictions[col_name] = predictions[q].flatten()

# Concatenate the static and dynamic parts
final_df = pd.concat([df_static.reset_index(drop=True), df_predictions], axis=1)

# Displaying the final DataFrame
print(final_df)


  forecast_date  target  horizon     q0.025      q0.25       q0.5      q0.75  \
0    2024-01-12  energy  36 hour  41.588631  45.891453  50.111164  54.336388   
1    2024-01-12  energy  40 hour  43.430901  45.711517  48.828651  52.813740   
2    2024-01-12  energy  44 hour  54.361748  55.856518  58.676788  61.316986   
3    2024-01-12  energy  60 hour  44.453987  48.294262  50.058281  52.012207   
4    2024-01-12  energy  64 hour  61.760460  68.863976  70.727318  71.559601   
5    2024-01-12  energy  68 hour  65.513321  73.505501  75.737915  75.942764   

      q0.975  
0  53.313656  
1  54.355484  
2  63.376110  
3  54.552269  
4  71.579018  
5  77.449783  


In [23]:
final_df

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2024-01-12,energy,36 hour,41.588631,45.891453,50.111164,54.336388,53.313656
1,2024-01-12,energy,40 hour,43.430901,45.711517,48.828651,52.81374,54.355484
2,2024-01-12,energy,44 hour,54.361748,55.856518,58.676788,61.316986,63.37611
3,2024-01-12,energy,60 hour,44.453987,48.294262,50.058281,52.012207,54.552269
4,2024-01-12,energy,64 hour,61.76046,68.863976,70.727318,71.559601,71.579018
5,2024-01-12,energy,68 hour,65.513321,73.505501,75.737915,75.942764,77.449783


In [26]:
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta
from functions import get_energy

def energy_forecast(date_str, input_data=None):
    # Helper function to extract features for a given timestamp
    def extract_features_for_timestamp(timestamp, df):
        return [timestamp.hour, timestamp.day, timestamp.month, timestamp.weekday()]

    # Gradient and Hessian for quantile regression with XGBoost
    def xgb_quantile_grad_hess(quantile, y_true, y_pred):
        error = y_true - y_pred
        grad = np.where(error > 0, -quantile, -(quantile - 1))
        hess = np.ones_like(y_pred)
        return grad, hess

    # Load data
    if input_data is None:
        df = pd.DataFrame(get_energy.get())
    else:
        df = pd.DataFrame(input_data)

    # Set index as datetime if not already
    if not isinstance(df.index, pd.DatetimeIndex):
        df['date_time'] = pd.to_datetime(df['date_time'])
        df.set_index('date_time', inplace=True)

    # Extract features
    feature_columns = ['hour', 'day', 'month', 'week']
    df['hour'] = df.index.hour
    df['day'] = df.index.day
    df['month'] = df.index.month
    df['week'] = df.index.weekday

    # Prepare data for model
    X = df[feature_columns].values
    y = df['gesamt'].values.reshape(-1, 1)

    # Scaling
    scaler_X = StandardScaler()
    X_scaled = scaler_X.fit_transform(X)

    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y)

    # Train models for each quantile
    quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
    models = {}
    for q in quantiles:
        model = xgb.XGBRegressor(objective=lambda y_true, y_pred: xgb_quantile_grad_hess(q, y_true, y_pred))
        model.fit(X_scaled, y_scaled)
        models[q] = model

    # Generate predictions for future timestamps
    horizons = [36, 40, 44, 60, 64, 68]
    base_date = datetime.strptime(date_str, '%Y-%m-%d')
    future_timestamps = [base_date + timedelta(hours=h) for h in horizons]
    prediction_inputs = [extract_features_for_timestamp(ts, df) for ts in future_timestamps]
    prediction_inputs_scaled = scaler_X.transform(prediction_inputs)

    predictions = {}
    for q in quantiles:
        model = xgb.XGBRegressor(objective=lambda y_true, y_pred: xgb_quantile_grad_hess(q, y_true, y_pred))
        model.fit(X_scaled, y_scaled)
        pred_scaled = model.predict(prediction_inputs_scaled)
        # Check if predictions are valid (not NaN)
        if np.isnan(pred_scaled).any():
            print(f"Warning: NaN predictions for quantile {q}")
        else:
            pred_original_scale = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1))
            predictions[q] = pred_original_scale.flatten()  # Flatten the predictions

    # Formatting predictions into DataFrame
    if predictions:
        df_predictions = pd.DataFrame(predictions)
        # Correctly format column names
        df_predictions.columns = [f'q{str(q).replace("0.", "0")}' for q in quantiles]
        final_df = pd.concat([df_static.reset_index(drop=True), df_predictions], axis=1)
    else:
        print("Error: No valid predictions were generated.")
        final_df = pd.DataFrame()

    return final_df
# Example usage
final_df = energy_forecast("2024-01-12")
print(final_df)


  energydata = pd.concat([energydata, pd.DataFrame(rawdata, columns=col_names)])
100%|██████████| 264/264 [00:30<00:00,  8.55it/s]


  forecast_date  target  horizon      q0025       q025        q05       q075  \
0    2024-01-12  energy  36 hour  57.466991  58.654144  61.410053  63.266396   
1    2024-01-12  energy  40 hour  55.390163  56.997665  59.159149  60.022156   
2    2024-01-12  energy  44 hour  54.215191  55.719337  57.671482  59.756744   
3    2024-01-12  energy  60 hour  53.756184  57.790195  60.209015  60.499798   
4    2024-01-12  energy  64 hour  51.393444  56.076744  59.266396  57.996037   
5    2024-01-12  energy  68 hour  53.085033  57.167255  59.830135  59.274750   

       q0975  
0  66.219917  
1  64.365433  
2  62.567345  
3  62.589691  
4  60.572102  
5  61.945896  


In [27]:
final_df

Unnamed: 0,forecast_date,target,horizon,q0025,q025,q05,q075,q0975
0,2024-01-12,energy,36 hour,57.466991,58.654144,61.410053,63.266396,66.219917
1,2024-01-12,energy,40 hour,55.390163,56.997665,59.159149,60.022156,64.365433
2,2024-01-12,energy,44 hour,54.215191,55.719337,57.671482,59.756744,62.567345
3,2024-01-12,energy,60 hour,53.756184,57.790195,60.209015,60.499798,62.589691
4,2024-01-12,energy,64 hour,51.393444,56.076744,59.266396,57.996037,60.572102
5,2024-01-12,energy,68 hour,53.085033,57.167255,59.830135,59.27475,61.945896


In [7]:
def energy_forecast(input_data=None, date_str=None):
    # Helper function to extract features for a given timestamp
    def extract_features_for_timestamp(timestamp, df):
        return [timestamp.hour, timestamp.day, timestamp.month, timestamp.weekday()]

    # Gradient and Hessian for quantile regression with XGBoost
    def xgb_quantile_grad_hess(quantile, y_true, y_pred):
        error = y_true - y_pred
        grad = np.where(error > 0, -quantile, -(quantile - 1))
        hess = np.ones_like(y_pred)
        return grad, hess

    # Load data
    if input_data is None:
        df = pd.DataFrame(get_energy.get())
    else:
        df = pd.DataFrame(input_data)

    # Set index as datetime if not already
    if not isinstance(df.index, pd.DatetimeIndex):
        df['date_time'] = pd.to_datetime(df['date_time'])
        df.set_index('date_time', inplace=True)

    # Extract features
    feature_columns = ['hour', 'day', 'month', 'week']
    df['hour'] = df.index.hour
    df['day'] = df.index.day
    df['month'] = df.index.month
    df['week'] = df.index.weekday

    # Prepare data for model
    X = df[feature_columns].values
    y = df['gesamt'].values.reshape(-1, 1)

    # Scaling
    scaler_X = StandardScaler()
    X_scaled = scaler_X.fit_transform(X)

    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y)

    # Train models for each quantile
    quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
    models = {}
    for q in quantiles:
        model = xgb.XGBRegressor(objective=lambda y_true, y_pred: xgb_quantile_grad_hess(q, y_true, y_pred))
        model.fit(X_scaled, y_scaled)
        models[q] = model

    # Generate predictions for future timestamps
    horizons = [36, 40, 44, 60, 64, 68]
    base_date = datetime.strptime(date_str, '%Y-%m-%d')
    future_timestamps = [base_date + timedelta(hours=h) for h in horizons]
    prediction_inputs = [extract_features_for_timestamp(ts, df) for ts in future_timestamps]
    prediction_inputs_scaled = scaler_X.transform(prediction_inputs)

    predictions = {}
    for q in quantiles:
        model = xgb.XGBRegressor(objective=lambda y_true, y_pred: xgb_quantile_grad_hess(q, y_true, y_pred))
        model.fit(X_scaled, y_scaled)
        pred_scaled = model.predict(prediction_inputs_scaled)
        # Check if predictions are valid (not NaN)
        if np.isnan(pred_scaled).any():
            print(f"Warning: NaN predictions for quantile {q}")
        else:
            pred_original_scale = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1))
            predictions[q] = pred_original_scale.flatten()  # Flatten the predictions

    # Formatting predictions into DataFrame
    # Initialize DataFrame for static information
    df_static = pd.DataFrame({
        'forecast_date': [base_date] * len(horizons),
        'target': ['energy'] * len(horizons),
        'horizon': [f'{h} hour' for h in horizons]
    })
    if predictions:
        df_predictions = pd.DataFrame(predictions)
        # Correctly format column names
        df_predictions.columns = [f'q{q}' for q in quantiles]
        final_df = pd.concat([df_static.reset_index(drop=True), df_predictions], axis=1)
    else:
        print("Error: No valid predictions were generated.")
        final_df = pd.DataFrame()

    return final_df

In [8]:
final_df = energy_forecast(date_str="2024-01-12")

  energydata = pd.concat([energydata, pd.DataFrame(rawdata, columns=col_names)])
100%|██████████| 264/264 [00:28<00:00,  9.34it/s]


In [9]:
final_df

Unnamed: 0,forecast_date,target,horizon,q0.025,q0.25,q0.5,q0.75,q0.975
0,2024-01-12,energy,36 hour,57.910706,62.421505,60.986145,63.87986,66.519417
1,2024-01-12,energy,40 hour,55.416237,59.61105,59.106674,60.321102,64.163994
2,2024-01-12,energy,44 hour,53.826439,57.689701,58.173233,57.753426,62.82711
3,2024-01-12,energy,60 hour,54.585552,55.60535,56.879314,60.770435,62.274242
4,2024-01-12,energy,64 hour,51.848927,53.309471,56.164444,57.86697,60.45657
5,2024-01-12,energy,68 hour,53.426029,54.252979,56.069691,57.669212,62.152622


In [2]:
final_df = energy_forecast(date_str="2024-01-12")

  energydata = pd.concat([energydata, pd.DataFrame(rawdata, columns=col_names)])
100%|██████████| 264/264 [00:31<00:00,  8.39it/s]


In [3]:
final_df

Unnamed: 0,forecast_date,target,horizon,0
0,2024-01-12,energy,36 hour,q0.025
1,2024-01-12,energy,40 hour,q0.25
2,2024-01-12,energy,44 hour,q0.5
3,2024-01-12,energy,60 hour,q0.75
4,2024-01-12,energy,64 hour,q0.975
5,2024-01-12,energy,68 hour,
