# Ontario Power - Load Data

## Setup

In [1]:
import pandas as pd
import numpy as np

In [3]:
# Set up path to data files
input_data_path = 'https://raw.githubusercontent.com/CommonSenseMachineLearning/OntarioPower/master/raw_data/demand_original_csv/'

## Load Data
- Two versions of the code are provided.  After discussion, only one will be kept.

### v1 - Basic Data Load

In [26]:
#simple version (easy to write, understand and debug)
demand_2015 = pd.read_csv(input_data_path+'PUB_DemandZonal_2015.csv', skiprows=3)
demand_2016 = pd.read_csv(input_data_path+'PUB_DemandZonal_2016.csv', skiprows=3)
demand_2017 = pd.read_csv(input_data_path+'PUB_DemandZonal_2017.csv', skiprows=3)
demand_2018 = pd.read_csv(input_data_path+'PUB_DemandZonal_2018.csv', skiprows=3)
demand_2019 = pd.read_csv(input_data_path+'PUB_DemandZonal_2019.csv', skiprows=3)
demand_2020 = pd.read_csv(input_data_path+'PUB_DemandZonal_2020.csv', skiprows=3)

demand_v1 = pd.concat([demand_2015, demand_2016, demand_2017, demand_2018, demand_2019, demand_2020])

print('Final shape:', demand_v1.shape)

demand_v1

Final shape: (49968, 15)


Unnamed: 0,Date,Hour,Ontario Demand,Northwest,Northeast,Ottawa,East,Toronto,Essa,Bruce,Southwest,Niagara,West,Zone Total,Diff
0,2015-01-01,1,14960,604,1314,1026,935,5317,1040,57,2881,411,1346,14932,-28
1,2015-01-01,2,14476,597,1282,988,911,5135,995,56,2784,388,1289,14425,-51
2,2015-01-01,3,13979,592,1280,966,900,4976,957,56,2679,372,1249,14026,47
3,2015-01-01,4,13670,590,1301,954,903,4851,933,55,2576,358,1221,13741,71
4,2015-01-01,5,13567,585,1313,954,904,4789,929,57,2543,353,1217,13644,77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6139,2020-09-12,20,14692,497,1088,911,957,5438,923,74,2854,471,1708,14922,229
6140,2020-09-12,21,14244,495,1057,862,901,5259,863,68,2735,444,1657,14341,97
6141,2020-09-12,22,13472,480,1033,807,840,5034,798,67,2560,415,1575,13609,137
6142,2020-09-12,23,12603,465,993,748,778,4756,728,67,2431,382,1462,12811,208


### v2 - Flexible, Scalable, Complex Data Load

In [31]:
#complex version (for fun)
demand_v2 = pd.DataFrame() #create empty DataFrame

#load data files for 2015 to 2020:
for i in range(2015,2021):
  
  #build path and file name
  file_name = f'PUB_DemandZonal_{i}.csv'
  print(i,'file:', file_name)

  #load file into temp DataFrame
  temp_demand = pd.read_csv(input_data_path + file_name, skiprows=3)
  print('shape:', temp_demand.shape, '\n')
  
  #add temp file to final DataFrame
  demand_v2 = pd.concat([demand_v2, temp_demand])

#preview final data
print('Final shape:', demand_v2.shape)
demand_v2

2015 file: PUB_DemandZonal_2015.csv
shape: (8760, 15) 

2016 file: PUB_DemandZonal_2016.csv
shape: (8784, 15) 

2017 file: PUB_DemandZonal_2017.csv
shape: (8760, 15) 

2018 file: PUB_DemandZonal_2018.csv
shape: (8760, 15) 

2019 file: PUB_DemandZonal_2019.csv
shape: (8760, 15) 

2020 file: PUB_DemandZonal_2020.csv
shape: (6144, 15) 

Final shape: (49968, 15)


Unnamed: 0,Date,Hour,Ontario Demand,Northwest,Northeast,Ottawa,East,Toronto,Essa,Bruce,Southwest,Niagara,West,Zone Total,Diff
0,2015-01-01,1,14960,604,1314,1026,935,5317,1040,57,2881,411,1346,14932,-28
1,2015-01-01,2,14476,597,1282,988,911,5135,995,56,2784,388,1289,14425,-51
2,2015-01-01,3,13979,592,1280,966,900,4976,957,56,2679,372,1249,14026,47
3,2015-01-01,4,13670,590,1301,954,903,4851,933,55,2576,358,1221,13741,71
4,2015-01-01,5,13567,585,1313,954,904,4789,929,57,2543,353,1217,13644,77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6139,2020-09-12,20,14692,497,1088,911,957,5438,923,74,2854,471,1708,14922,229
6140,2020-09-12,21,14244,495,1057,862,901,5259,863,68,2735,444,1657,14341,97
6141,2020-09-12,22,13472,480,1033,807,840,5034,798,67,2560,415,1575,13609,137
6142,2020-09-12,23,12603,465,993,748,778,4756,728,67,2431,382,1462,12811,208
