50:50

In [1]:
import pandas as pd
import pmdarima as pm
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Load your dataset (assuming it is a CSV file)
df = pd.read_csv('phase_1_data_with_survey.csv')

# Convert 'Date' and 'Hour' columns to a datetime object
df['Datetime'] = pd.to_datetime(df['Date']) + pd.to_timedelta(df['Hour'], unit='h')

# Set 'Datetime' as index
df.set_index('Datetime', inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
df['home_size_encoded'] = label_encoder.fit_transform(df['home_size'])
df['electric_car_encoded'] = df['electric_car'].map({'Yes': 1, 'No': 0})
df['electrically_heated_encoded'] = df['electrically_heated'].map({'Yes': 1, 'No': 0})

# List of exogenous variables
exog_vars = ['Temperature', 'home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded']

def fit_forecast_arima(df, split_ratio=0.5):
    ids = df['ID'].unique()
    forecasts = []
    i = 1
    for id_ in ids:
        print(f"{i} Processing ID: {id_}")
        i += 1
        df_id = df[df['ID'] == id_].sort_index()

        # Split the data into train and test
        split_index = int(len(df_id) * split_ratio)
        train = df_id.iloc[:split_index]
        test = df_id.iloc[split_index:]

        # Fit the ARIMA model
        model = auto_arima(train['Demand_kWh'],
                           exogenous=train[exog_vars],
                           seasonal=True,
                           m=24,
                           stepwise=True,
                           suppress_warnings=True,
                           trace=False)

        # Forecast
        n_periods = len(test)
        forecast = model.predict(n_periods=n_periods, exogenous=test[exog_vars])

        # Store the forecast and actual values
        forecast_df = pd.DataFrame({
            'ID': id_,
            'Datetime': test.index,
            'Actual_Demand': test['Demand_kWh'],
            'Predicted_Demand': forecast
        })

        forecasts.append(forecast_df)

    return pd.concat(forecasts)

# Run the forecasting function
results_df = fit_forecast_arima(df)

# Calculate overall error metrics
mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
rmse = np.sqrt(mse)

print(f"Overall MSE: {mse}")
print(f"Overall MAE: {mae}")
print(f"Overall RMSE: {rmse}")

# Write the results to a file
results_df['Date'] = results_df['Datetime'].dt.date
results_df['Hour'] = results_df['Datetime'].dt.hour
results_df = results_df[['ID', 'Date', 'Hour', 'Actual_Demand', 'Predicted_Demand']]
results_df.to_csv('sarima_phase_1_with_survey_5050.csv', index=False)

print("Forecast results have been written to sarima_phase_1_with_survey_5050.csv")


1 Processing ID: Exp_62
2 Processing ID: Exp_111
3 Processing ID: Exp_567
4 Processing ID: Exp_500
5 Processing ID: Exp_222
6 Processing ID: Exp_587
7 Processing ID: Exp_17
8 Processing ID: Exp_462
9 Processing ID: Exp_271
10 Processing ID: Exp_49
11 Processing ID: Exp_451
12 Processing ID: Exp_502
13 Processing ID: Exp_86
14 Processing ID: Exp_612
15 Processing ID: Exp_12
16 Processing ID: Exp_208
17 Processing ID: Exp_529
18 Processing ID: Exp_56
19 Processing ID: Exp_93
20 Processing ID: Exp_446
21 Processing ID: Exp_643
22 Processing ID: Exp_191
23 Processing ID: Exp_445
24 Processing ID: Exp_420
25 Processing ID: Exp_103
26 Processing ID: Exp_693
27 Processing ID: Exp_367
28 Processing ID: Exp_325
29 Processing ID: Exp_182
30 Processing ID: Exp_124
31 Processing ID: Exp_272
32 Processing ID: Exp_332
33 Processing ID: Exp_732
34 Processing ID: Exp_737
35 Processing ID: Exp_23
36 Processing ID: Exp_460
Overall MSE: 1.1033474644178984
Overall MAE: 0.7095453306251626
Overall RMSE: 1.0

In [1]:
import pandas as pd
import pmdarima as pm
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# Load your dataset (assuming it is a CSV file)
df = pd.read_csv('phase_1_data_with_survey.csv')

# Convert 'Date' and 'Hour' columns to a datetime object
df['Datetime'] = pd.to_datetime(df['Date']) + pd.to_timedelta(df['Hour'], unit='h')

# Set 'Datetime' as index
df.set_index('Datetime', inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
df['home_size_encoded'] = label_encoder.fit_transform(df['home_size'])
df['electric_car_encoded'] = df['electric_car'].map({'Yes': 1, 'No': 0})
df['electrically_heated_encoded'] = df['electrically_heated'].map({'Yes': 1, 'No': 0})

# List of exogenous variables
exog_vars = ['Temperature', 'home_size_encoded', 'electric_car_encoded', 'electrically_heated_encoded']

def fit_forecast_arima(df, split_ratio=0.8):
    ids = df['ID'].unique()
    forecasts = []
    i = 1
    for id_ in ids:
        print(f"{i} Processing ID: {id_}")
        i += 1
        df_id = df[df['ID'] == id_].sort_index()

        # Split the data into train and test
        split_index = int(len(df_id) * split_ratio)
        train = df_id.iloc[:split_index]
        test = df_id.iloc[split_index:]

        # Fit the ARIMA model
        model = auto_arima(train['Demand_kWh'],
                           exogenous=train[exog_vars],
                           seasonal=True,
                           m=24,
                           stepwise=True,
                           suppress_warnings=True,
                           trace=False)

        # Forecast
        n_periods = len(test)
        forecast = model.predict(n_periods=n_periods, exogenous=test[exog_vars])

        # Store the forecast and actual values
        forecast_df = pd.DataFrame({
            'ID': id_,
            'Datetime': test.index,
            'Actual_Demand': test['Demand_kWh'],
            'Predicted_Demand': forecast
        })

        forecasts.append(forecast_df)

    return pd.concat(forecasts)

# Run the forecasting function
results_df = fit_forecast_arima(df)

# Calculate overall error metrics
mse = mean_squared_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
mae = mean_absolute_error(results_df['Actual_Demand'], results_df['Predicted_Demand'])
rmse = np.sqrt(mse)

print(f"Overall MSE: {mse}")
print(f"Overall MAE: {mae}")
print(f"Overall RMSE: {rmse}")

# Write the results to a file
results_df['Date'] = results_df['Datetime'].dt.date
results_df['Hour'] = results_df['Datetime'].dt.hour
results_df = results_df[['ID', 'Date', 'Hour', 'Actual_Demand', 'Predicted_Demand']]
results_df.to_csv('sarima_phase_1_with_survey_8020.csv', index=False)

print("Forecast results have been written to sarima_phase_1_with_survey_5050.csv")


1 Processing ID: Exp_62
2 Processing ID: Exp_111
3 Processing ID: Exp_567
4 Processing ID: Exp_500
5 Processing ID: Exp_222
6 Processing ID: Exp_587
7 Processing ID: Exp_17
8 Processing ID: Exp_462
9 Processing ID: Exp_271
10 Processing ID: Exp_49
11 Processing ID: Exp_451
12 Processing ID: Exp_502
13 Processing ID: Exp_86
14 Processing ID: Exp_612
15 Processing ID: Exp_12
16 Processing ID: Exp_208
17 Processing ID: Exp_529
18 Processing ID: Exp_56
19 Processing ID: Exp_93
20 Processing ID: Exp_446
21 Processing ID: Exp_643
22 Processing ID: Exp_191
23 Processing ID: Exp_445
24 Processing ID: Exp_420
25 Processing ID: Exp_103


MemoryError: Unable to allocate 30.9 MiB for an array with shape (53, 53, 1441) and data type float64