Import Modules

In [None]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, desc
from config import api_key
import numpy as np
import requests
import json

Import files as dataframes

In [None]:
# Import files as DataFrames
fourteen_fiftenn_pd = pd.read_csv("draft_data/2014-15.csv")
seventeen_eighteen_pd = pd.read_csv("draft_data/2017-18.csv", encoding = "ISO-8859-1")
twenty_twentyone = pd.read_csv("draft_data/2020-21.csv", encoding = "ISO-8859-1")

Initial transformation to set up for API call for lats & lngs

In [None]:
# Rename columns to be uniform across dataframes
twenty_twentyone.rename(columns={"Facility name":"Facility Name"}, inplace=True)

In [None]:
# Remove rows of corporate total data
fourteen_fiftenn_pd = fourteen_fiftenn_pd[fourteen_fiftenn_pd['Facility Name'] != 'Corporate Total']
seventeen_eighteen_pd = seventeen_eighteen_pd[seventeen_eighteen_pd['Facility Name'] != 'Corporate Total']
twenty_twentyone = twenty_twentyone[twenty_twentyone['Facility Name'] != 'Corporate Total']

In [None]:
# Drop columns of un-needed data
fourteen_fiftenn_pd = fourteen_fiftenn_pd.drop(columns={"Type", "Scope 1 (t CO2-e)","Scope 2 (t CO2-e)","Total Emissions (t CO2-e)", "Emission  Intensity (t/Mwh)", "Grid Connected", "Grid", "Important Notes"})
seventeen_eighteen_pd = seventeen_eighteen_pd.drop(columns={"Type", "Total Scope 1 Emissions \n(t CO2-e)","Total Scope 2 Emissions \n(t CO2-e)","Total Emissions \n(t CO2-e)", "Emission Intensity \n(t CO2-e/ MWh)", "Grid Connected", "Grid"})
twenty_twentyone = twenty_twentyone.drop(columns={"Type", "Total scope 1 emissions \n(t CO2-e)","Total scope 2 emissions \n(t CO2-e)","Total emissions \n(t CO2-e)", "Emission intensity \n(t CO2-e/ MWh)", "Grid connected", "Grid"})

In [None]:
# Add year column for filtering in plots
fourteen_fiftenn_pd['Year'] = '2014-2015'
seventeen_eighteen_pd['Year'] = '2017-2018'
twenty_twentyone['Year'] = '2020-2021'

API call for lats & lngs

In [None]:
# create a params dict that will be updated with new city each iteration
params = {"key": api_key}

# Loop through the cities_pd and run a lat/long search for each city
for index, row in fourteen_fiftenn_pd.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    location = row['Facility Name']
    state = row['State']

    # update address key value
    params['address'] = f"{location}+{state}"

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
    # print the cities_lat_lng url, avoid doing for public github repos in order to avoid exposing key
    # print(cities_lat_lng.url)
    
#     # convert to json
    cities_lat_lng = cities_lat_lng.json()
    
    try:
        fourteen_fiftenn_pd.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
        fourteen_fiftenn_pd.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
    except (KeyError, IndexError):
        fourteen_fiftenn_pd.loc[index, "Lat"] = "NaN"
        fourteen_fiftenn_pd.loc[index, "Lng"] = "NaN"

# Visualize to confirm lat lng appear
fourteen_fiftenn_pd.head()

In [None]:
# create a params dict that will be updated with new city each iteration
params = {"key": api_key}

# Loop through the cities_pd and run a lat/long search for each city
for index, row in seventeen_eighteen_pd.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    location = row['Facility Name']
    state = row['State']

    # update address key value
    params['address'] = f"{location}+{state}"

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
    # print the cities_lat_lng url, avoid doing for public github repos in order to avoid exposing key
    # print(cities_lat_lng.url)
    
#     # convert to json
    cities_lat_lng = cities_lat_lng.json()
    
    try:
        seventeen_eighteen_pd.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
        seventeen_eighteen_pd.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
    except (KeyError, IndexError):
        seventeen_eighteen_pd.loc[index, "Lat"] = "NaN"
        seventeen_eighteen_pd.loc[index, "Lng"] = "NaN"

seventeen_eighteen_pd.head()

In [None]:
# create a params dict that will be updated with new city each iteration
params = {"key": api_key}

# Loop through the cities_pd and run a lat/long search for each city
for index, row in twenty_twentyone.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    location = row['Facility Name']
    state = row['State']

    # update address key value
    params['address'] = f"{location} + {state}"

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
    # print the cities_lat_lng url, avoid doing for public github repos in order to avoid exposing key
    # print(cities_lat_lng.url)
    
#     # convert to json
    cities_lat_lng = cities_lat_lng.json()
    
    try:
        twenty_twentyone.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
        twenty_twentyone.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]
    except (KeyError, IndexError):
        twenty_twentyone.loc[index, "Lat"] = "NaN"
        twenty_twentyone.loc[index, "Lng"] = "NaN"

twenty_twentyone.head()

In [None]:
# Drop rows which returned no coordinates for facilities
fourteen_fiftenn_pd = fourteen_fiftenn_pd[fourteen_fiftenn_pd['Lat'] != 'NaN']

seventeen_eighteen_pd = seventeen_eighteen_pd[seventeen_eighteen_pd['Lat'] != 'NaN']

twenty_twentyone = twenty_twentyone[twenty_twentyone['Lat'] != 'NaN']

Clean up the populated dataframes

In [None]:
fourteen_fifteen_transformed = fourteen_fiftenn_pd.rename(columns={"Reporting Entity ": "Reporting_Entity",
                                                                   "Facility Name": "Facility_Name",
                                                                   "Electricity Production (GJ)": "Electricity_Production_GJ",
                                                                   "Electricity Production (Mwh)": "Electricity_Production_MWh",
                                                                   "Primary Fuel": "Primary_Fuel"})

In [None]:
seventeen_eighteen_transformed = seventeen_eighteen_pd.rename(columns={"Reporting Entity": "Reporting_Entity",
                                                                   "Facility Name": "Facility_Name",
                                                                   "Electricity Production (GJ)": "Electricity_Production_GJ",
                                                                   "Electricity Production (MWh)": "Electricity_Production_MWh",
                                                                   "Primary Fuel": "Primary_Fuel"})

In [None]:
twenty_twentyone_transformed = twenty_twentyone.rename(columns={"Reporting entity": "Reporting_Entity",
                                                                   "Facility Name": "Facility_Name",
                                                                   "Electricity production (GJ)": "Electricity_Production_GJ",
                                                                   "Electricity production (MWh)": "Electricity_Production_MWh",
                                                                   "Primary fuel": "Primary_Fuel"})

Categorize renewables

In [None]:
renewables = ['Hydro', 'Landfill Gas', 'Wind', 'Solar', 'Bagasse', 'Biofuel', 'Macadamia Nut Shells', 'Biogas']

fourteen_fifteen_transformed['Renewable'] = fourteen_fifteen_transformed.Primary_Fuel.isin(renewables)
seventeen_eighteen_transformed['Renewable'] = seventeen_eighteen_transformed.Primary_Fuel.isin(renewables)
twenty_twentyone_transformed['Renewable'] = twenty_twentyone_transformed.Primary_Fuel.isin(renewables)

Drop the index

In [None]:
fourteen_fifteen_transformed.drop(columns=['index'], inplace=True)
seventeen_eighteen_transformed.drop(columns=['index'], inplace=True)
twenty_twentyone_transformed.drop(columns=['index'], inplace=True)

Export data as CSV

In [None]:
fourteen_fifteen_transformed.to_csv('../data/14-15.csv')
seventeen_eighteen_transformed.to_csv('../data/17-18.csv')
twenty_twentyone_transformed.to_csv('../data/20-21.csv')