In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [3]:
# Load the data
df = pd.read_csv("/home/astha/minor_pro/Network-Prediction-Analysis/src/data/Data1.csv")
df['PERIOD_START_TIME'] = pd.to_datetime(df['PERIOD_START_TIME'])


In [7]:
df.head

<bound method NDFrame.head of        PERIOD_START_TIME        pl        pd       pdv
0    2022-07-04 06:00:00  0.003754  0.975612  0.018548
1    2022-07-04 06:15:00  0.003019  0.976934  0.019894
2    2022-07-04 06:30:00  0.005802  0.977411  0.020060
3    2022-07-04 06:45:00  0.004035  0.977569  0.021209
4    2022-07-04 07:00:00  0.005641  0.981013  0.024581
...                  ...       ...       ...       ...
5888 2022-10-01 22:45:00  0.004487  0.744733  0.044438
5889 2022-10-01 23:00:00  0.001543  0.732024  0.031747
5890 2022-10-01 23:15:00  0.001963  0.728793  0.028551
5891 2022-10-01 23:30:00  0.008098  0.738851  0.038595
5892 2022-10-01 23:45:00  0.006501  0.734670  0.036419

[5893 rows x 4 columns]>

In [21]:
# Create lagged features for 'pl', 'pd', and 'pdv'
for lag in range(1, 4):
    for col in ['pl', 'pd', 'pdv']:
        lag_df[f'{col}_lag{lag}'] = lag_df[col].shift(lag)

lag_df.dropna(inplace=True)

# Define test date
test_date = pd.to_datetime("2022-09-27").date()

# Split the lagged dataframe
train_lag = lag_df[lag_df['PERIOD_START_TIME'].dt.date < test_date]
test_lag = lag_df[lag_df['PERIOD_START_TIME'].dt.date == test_date]

In [24]:
# --- Predict with XGBoost ---
result_dict = {'PERIOD_START_TIME': test_lag['PERIOD_START_TIME'].values}

# Dictionary to store scalers for inverse transformation
scalers_xgb = {}

# Iterate over each column to be predicted
for col in ['pl', 'pd', 'pdv']:
    # Prepare training data
    X_train = train_lag[[f'{col}_lag1', f'{col}_lag2', f'{col}_lag3']]
    y_train = train_lag[col].values.reshape(-1, 1)

    # Create and fit a scaler for the target variable
    scaler = MinMaxScaler()
    y_train_scaled = scaler.fit_transform(y_train)
    scalers_xgb[col] = scaler

    # Prepare test data for prediction
    X_test = test_lag[[f'{col}_lag1', f'{col}_lag2', f'{col}_lag3']]
    
    # Initialize and train the XGBoost model
    model = XGBRegressor(
        objective='reg:tweedie',
        tweedie_variance_power=1.5,
        learning_rate=0.1,
        max_depth=4,
        n_estimators=100,
        random_state=42
    )
    model.fit(X_train, y_train_scaled)

    # Make scaled predictions
    y_pred_scaled = model.predict(X_test).reshape(-1, 1)

    # Inverse transform the predictions to the original scale
    y_pred_original = scaler.inverse_transform(y_pred_scaled).flatten()

    # Clip any negative values that might occur
    y_pred_original = np.clip(y_pred_original, 0, None)

    # Store the predictions in the result dictionary
    result_dict[f'{col}_xgb'] = y_pred_original

# Save to CSV
result_df = pd.DataFrame(result_dict)
output_file = '../data/xgb_result.csv'
result_df.to_csv(output_file, index=False)
print(f"Forecast results saved to {output_file}")

Forecast results saved to ../data/xgb_result.csv


In [25]:
xgb = pd.read_csv("/home/astha/minor_pro/Network-Prediction-Analysis/src/data/xgb_result.csv")
xgb.head()

Unnamed: 0,PERIOD_START_TIME,pl_xgb,pd_xgb,pdv_xgb
0,2022-09-27 06:00:00,0.004698,0.728813,0.031685
1,2022-09-27 06:15:00,0.005167,0.719052,0.02559
2,2022-09-27 06:30:00,0.002859,0.729058,0.025098
3,2022-09-27 06:45:00,0.001849,0.719651,0.025397
4,2022-09-27 07:00:00,0.001514,0.719646,0.025171
