# Electricity Demand Forecasting

## <table id='Table_Of_Contents'>Table Of Contents</table>

1. [Import Modules](#import)<br>
2. [Read Data File](#read_data)<br>
    1. [Clean Data Frame](#clean_data)<br>
    2. [Data Split](#data_split)<br>




### <a id='import'> 1. Import Modules</a>

In [3]:
import pandas as pd
import numpy as np
import re

import json
import datetime


from statsmodels.graphics.tsaplots import plot_acf,plot_pacf 
from statsmodels.tsa.seasonal import seasonal_decompose

from sklearn.preprocessing import StandardScaler

#from pmdarima import auto_arima

from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse


import warnings

from dateutil import parser

import matplotlib.pyplot as plt

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.tsa.statespace.mlemodel

### 2. Data Import <a id='read_data'></a>

In [4]:
data_file = '../data/elec_demand.csv'

In [11]:
full_df = pd.read_csv(data_file, index_col=0).astype({'demand':'float'})

#### 2.A Clean the data frame <a id='clean_data'></a>

In [12]:
# Convert the time string to a datetime object
dt_time = [parser.isoparse(x) for x in full_df.loc[:, 'time']]
full_df.index = pd.DatetimeIndex(dt_time)
full_df.drop('time', axis=1, inplace=True)
full_df.sort_index(inplace=True)
full_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 378 entries, 2020-03-09 00:00:00+00:00 to 2020-03-24 17:00:00+00:00
Data columns (total 1 columns):
demand    378 non-null float64
dtypes: float64(1)
memory usage: 5.9 KB


In [17]:
full_df.index = pd.DatetimeIndex(full_df.index)
full_df['date'] = [x.date() for x in full_df.index]
full_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 378 entries, 2020-03-09 00:00:00+00:00 to 2020-03-24 17:00:00+00:00
Data columns (total 2 columns):
demand    378 non-null float64
date      378 non-null object
dtypes: float64(1), object(1)
memory usage: 8.9+ KB


In [18]:
full_df.head()

Unnamed: 0,demand,date
2020-03-09 00:00:00+00:00,24260.0,2020-03-09
2020-03-09 01:00:00+00:00,25786.0,2020-03-09
2020-03-09 02:00:00+00:00,28496.0,2020-03-09
2020-03-09 03:00:00+00:00,30754.0,2020-03-09
2020-03-09 04:00:00+00:00,30688.0,2020-03-09


In [21]:
day_demand = full_df.groupby('date').sum()
day_demand.to_csv('../data/new_demand.csv')