# Ontario Power - Load Data

## Setup

In [43]:
import pandas as pd
import numpy as np

In [44]:
# Set up path to data files
input_data_path = 'https://raw.githubusercontent.com/CommonSenseMachineLearning/OntarioPower/master/raw_data/demand_original_csv/'

## Load Data
- Two versions of the code are provided.  After discussion, only one will be kept.

### Load Demand Data

#### v1 - Basic Data Load

In [38]:
#simple version (easy to write, understand and debug)
demand_2015 = pd.read_csv(input_data_path+'PUB_DemandZonal_2015.csv', skiprows=3)
demand_2016 = pd.read_csv(input_data_path+'PUB_DemandZonal_2016.csv', skiprows=3)
demand_2017 = pd.read_csv(input_data_path+'PUB_DemandZonal_2017.csv', skiprows=3)
demand_2018 = pd.read_csv(input_data_path+'PUB_DemandZonal_2018.csv', skiprows=3)
demand_2019 = pd.read_csv(input_data_path+'PUB_DemandZonal_2019.csv', skiprows=3)
demand_2020 = pd.read_csv(input_data_path+'PUB_DemandZonal_2020.csv', skiprows=3)


demand_v1 = pd.concat([demand_2015, demand_2016, demand_2017, demand_2018, demand_2019, demand_2020])

print('Final shape:', demand_v1.shape)

demand_v1

Final shape: (49968, 15)


Unnamed: 0,Date,Hour,Ontario Demand,Northwest,Northeast,Ottawa,East,Toronto,Essa,Bruce,Southwest,Niagara,West,Zone Total,Diff
0,2015-01-01,1,14960,604,1314,1026,935,5317,1040,57,2881,411,1346,14932,-28
1,2015-01-01,2,14476,597,1282,988,911,5135,995,56,2784,388,1289,14425,-51
2,2015-01-01,3,13979,592,1280,966,900,4976,957,56,2679,372,1249,14026,47
3,2015-01-01,4,13670,590,1301,954,903,4851,933,55,2576,358,1221,13741,71
4,2015-01-01,5,13567,585,1313,954,904,4789,929,57,2543,353,1217,13644,77
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6139,2020-09-12,20,14692,497,1088,911,957,5438,923,74,2854,471,1708,14922,229
6140,2020-09-12,21,14244,495,1057,862,901,5259,863,68,2735,444,1657,14341,97
6141,2020-09-12,22,13472,480,1033,807,840,5034,798,67,2560,415,1575,13609,137
6142,2020-09-12,23,12603,465,993,748,778,4756,728,67,2431,382,1462,12811,208


#### v2 - Flexible, Scalable, Complex Data Load

In [None]:
#complex version (for fun)
demand_v2 = pd.DataFrame() #create empty DataFrame

#load data files for 2015 to 2020:
for i in range(2015,2021):
  
  #build path and file name
  file_name = f'PUB_DemandZonal_{i}.csv'
  print(i,'file:', file_name)

  #load file into temp DataFrame
  temp_demand = pd.read_csv(input_data_path + file_name, skiprows=3)
  print('shape:', temp_demand.shape, '\n')
  
  #add temp file to final DataFrame
  demand_v2 = pd.concat([demand_v2, temp_demand])

#preview final data
print('Final shape:', demand_v2.shape)
demand_v2

In [40]:
demand_v2[12000:12010]


Unnamed: 0,Date,Hour,Ontario Demand,Northwest,Northeast,Ottawa,East,Toronto,Essa,Bruce,Southwest,Niagara,West,Zone Total,Diff
3240,2016-05-15,1,11340,392,1066,554,876,4144,690,98,2091,350,1086,11348,9
3241,2016-05-15,2,11128,388,1070,536,858,4007,674,99,2025,344,1076,11077,-51
3242,2016-05-15,3,10956,390,1058,517,843,3946,667,99,1994,337,1058,10910,-46
3243,2016-05-15,4,10900,381,1078,519,856,3931,671,100,1999,343,1058,10936,36
3244,2016-05-15,5,11101,370,1103,529,883,3975,688,101,2037,341,1083,11111,10
3245,2016-05-15,6,11260,374,1110,541,890,4015,714,103,2094,340,1092,11274,14
3246,2016-05-15,7,11778,366,1134,577,905,4218,763,109,2229,370,1121,11792,14
3247,2016-05-15,8,12395,371,1166,638,923,4548,822,117,2357,407,1146,12494,99
3248,2016-05-15,9,12808,373,1196,754,836,4841,861,120,2420,422,1100,12921,113
3249,2016-05-15,10,12985,367,1187,760,812,5014,851,121,2430,422,1141,13105,120


### Load Supply Data

In [55]:
# Set up path to data files
input_data_path = 'https://raw.githubusercontent.com/CommonSenseMachineLearning/OntarioPower/master/raw_data/supply_converted_to_csv/'

#### v1 - Basic Data Load

In [56]:
supply_2015 = pd.read_csv(input_data_path+'2015_Power_Supply.csv')
supply_2016 = pd.read_csv(input_data_path+'2016_Power_Supply.csv')
supply_2017 = pd.read_csv(input_data_path+'2017_Power_Supply.csv')
supply_2018 = pd.read_csv(input_data_path+'2018_Power_Supply.csv')
supply_2019 = pd.read_csv(input_data_path+'2019_Power_Supply.csv')
supply_2020 = pd.read_csv(input_data_path+'2020_Power_Supply.csv')


supply_v1 = pd.concat([supply_2015, supply_2016,supply_2017,supply_2018,supply_2019,supply_2020])

print("Final shape:",supply_v1.shape)

supply_v1

Final shape: (300096, 11)


Unnamed: 0,docID,DocTitle,DocRevision,DocConfClass,CreatedAt,DeliveryYear,Day,Hour,Fuel,OutputQuality,Output
0,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,NUCLEAR,0,11564.0
1,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,GAS,0,957.0
2,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,HYDRO,0,3173.0
3,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,WIND,0,2504.0
4,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,SOLAR,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
37147,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,GAS,-1,414.0
37148,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,HYDRO,0,3069.0
37149,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,WIND,0,653.0
37150,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,SOLAR,0,0.0


#### v2 - Flexible, Scalable, Complex Supply Data Load

In [75]:
#Complex version

# 1)Create empty DataFrame
supply_v2 = pd.DataFrame() 

# 2)Load Data Files for 2015 to 2020
for i in range(2015,2021):

# 3)Build Path and File Name
  file_name = f'{i}_Power_Supply.csv'
  print(i,'file:',file_name)

# 4)Load File into Temp DataFrame
  temp_supply = pd.read_csv(input_data_path + file_name)
  print('shape:', temp_supply.shape, '\n')

# 5)Add Temp File to Final DataFrame
  supply_v2 = pd.concat([supply_v2, temp_supply])
  supply_v2

# 6)Preview Final Data
print('Final shape:',supply_v2.shape)
supply_v2  

2015 file: 2015_Power_Supply.csv
shape: (52560, 11) 

2016 file: 2016_Power_Supply.csv
shape: (52704, 11) 

2017 file: 2017_Power_Supply.csv
shape: (52560, 11) 

2018 file: 2018_Power_Supply.csv
shape: (52560, 11) 

2019 file: 2019_Power_Supply.csv
shape: (52560, 11) 

2020 file: 2020_Power_Supply.csv
shape: (37152, 11) 

Final shape: (300096, 11)


Unnamed: 0,docID,DocTitle,DocRevision,DocConfClass,CreatedAt,DeliveryYear,Day,Hour,Fuel,OutputQuality,Output
0,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,NUCLEAR,0,11564.0
1,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,GAS,0,957.0
2,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,HYDRO,0,3173.0
3,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,WIND,0,2504.0
4,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2016-01-01 6:42,2015,2015-01-01,1,SOLAR,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
37147,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,GAS,-1,414.0
37148,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,HYDRO,0,3069.0
37149,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,WIND,0,653.0
37150,GenOutputbyFuelHourly,Generator Output by Fuel Type Hourly Report,1,PUB,2020-09-15 6:50,2020,2020-09-14,24,SOLAR,0,0.0
