In [1]:
from sklearn import metrics
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor
import pandas as pd
from prophet import Prophet
from prophet.diagnostics import cross_validation


In [15]:
class Dataset:
    def __init__(self, filename):
        self.data = pd.read_csv(filename)
        self.data["timestamp"] = pd.to_datetime(
            self.data["timestamp"], format="%Y/%m/%d %H:%M:%S"
        )

    def fill_na(self, df):
        imp = IterativeImputer(estimator=ExtraTreesRegressor(), random_state=0)
        cant_fill = ["timestamp", "value"]
        can_fill = [i for i in df.columns if i not in cant_fill]
        imp.fit(df[can_fill])
        df[can_fill] = imp.transform(df[can_fill])
        return df

    def __getitem__(self, device_id):
        df = self.data[self.data["device_id"] == device_id]
        df = df.drop(["device_id", "lat_long", "holiday"], axis=1)
        return self.fill_na(df)

    def split(self, df, forecast_days):
        train_size = len(df) - forecast_days * 24 * 6
        df.columns = ["ds", "y"] + list(df.columns[2:])
        train_data = df[:train_size]
        x, y = df.drop(["y"], axis=1), pd.DataFrame(df.iloc[train_size:, 1])
        return train_data, x[train_size:], y


In [16]:
dataset = Dataset("data/full_features.csv")
poweru = dataset["poweru"]
poweru




Unnamed: 0,timestamp,value,pv_output,solar_radiation,temperature,Precip,humidity,day_of_week,working_day,week_of_month
61286,2021-10-04 15:17:15,530.5,16.403339,187.828227,13.982297,0.069066,74.493700,0.0,1.0,1.0
61287,2021-10-04 15:19:37,543.0,16.403339,187.828227,13.982297,0.069066,74.493700,0.0,1.0,1.0
61288,2021-10-04 15:29:38,520.7,16.403339,187.828227,13.982297,0.069066,74.493700,0.0,1.0,1.0
61289,2021-10-04 15:31:33,520.7,16.403339,187.828227,13.982297,0.069066,74.493700,0.0,1.0,1.0
61290,2021-10-04 15:39:38,491.5,16.403339,187.828227,13.982297,0.069066,74.493700,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
87835,2022-04-17 23:18:50,194.3,0.000000,0.000000,13.560000,0.038462,84.923077,6.0,0.0,3.0
87836,2022-04-17 23:28:50,181.8,0.000000,0.000000,13.560000,0.038462,84.923077,6.0,0.0,3.0
87837,2022-04-17 23:38:50,209.7,0.000000,0.000000,13.560000,0.038462,84.923077,6.0,0.0,3.0
87838,2022-04-17 23:48:50,209.7,0.000000,0.000000,13.560000,0.038462,84.923077,6.0,0.0,3.0


In [17]:
features = list(poweru.columns[2:])
features


['pv_output',
 'solar_radiation',
 'temperature',
 'Precip',
 'humidity',
 'day_of_week',
 'working_day',
 'week_of_month']

In [18]:
train_data, x_test, y_test = dataset.split(poweru, 7)


In [21]:
class Model:
    def __init__(self, features, changepoint_prior_scale):
        self.model = Prophet(changepoint_prior_scale=changepoint_prior_scale)
        for feature in features:
            self.model.add_regressor(feature)

    def train(self, x):
        self.model.fit(x)

    def predict(self, x):
        return self.model.predict(x)

    def eval(self, label, pred):
        mae = metrics.mean_absolute_error(label, pred["yhat"])
        rmse = metrics.mean_absolute_error(label, pred["yhat"])
        mape = metrics.mean_absolute_percentage_error(label, pred["yhat"])
        return mae, rmse, mape


In [22]:
prophet_model = Model(features, 0.1)
prophet_model.train(train_data)
y_pred = prophet_model.predict(x_test)
mae, rmse, mape = prophet_model.eval(y_test, y_pred)


17:26:07 - cmdstanpy - INFO - Chain [1] start processing
17:26:58 - cmdstanpy - INFO - Chain [1] done processing


In [23]:
print("MAE: {}".format(mae))
print("RMSE: {}".format(rmse))
print("MAPE: {}".format(mape))


MAE: 44.74326051014192
RMSE: 44.74326051014192
MAPE: 0.16991838792012506
