Import modules

In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, desc
# from config import api_key
import numpy as np
import requests
import json

Import CSVs

In [2]:
# Import files as DataFrames
ff_transformed = pd.read_csv("../export_data/14-15.csv")
se_transformed = pd.read_csv("../export_data/17-18.csv")
tt_transformed = pd.read_csv("../export_data/20-21.csv")

Load into database

In [None]:
import pymongo

# Setup connection to mongodb
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Create database and collection to use
db = client['energy_db']

In [None]:
fourt_fift_energy_production = db.Fourteen_Fifteen_Energy_Production
sevt_eight_energy_production = db.Seventeen_Eighteen_Energy_Production
twe_twentyo_energy_production = db.Twenty_Twentyone_Energy_Production
aus_population = db.Aus_Population
aus_income = db.Aus_Income

In [None]:
fourt_fift_energy_production.insert_many(ff_transformed.to_dict('records'))
sevt_eight_energy_production.insert_many(se_transformed.to_dict('records'))
twe_twentyo_energy_production.insert_many(tt_transformed.to_dict('records'))

Set up the data for a visualisation that shows each state renewables growth.

In [40]:
ff_transformed_progression = ff_transformed[["State", "Year", "Renewable"]]
ff_transformed_progression

Unnamed: 0,State,Year,Renewable
0,NSW,2014-2015,True
1,ACT,2014-2015,True
2,VIC,2014-2015,True
3,VIC,2014-2015,True
4,NSW,2014-2015,False
...,...,...,...
344,WA,2014-2015,False
345,WA,2014-2015,False
346,WA,2014-2015,False
347,WA,2014-2015,False


In [4]:
ff_transformed_progression_renewable = ff_transformed_progression.loc[ff_transformed_progression["Renewable"] == True]
ff_transformed_progression_renewable

# group dataframe by State
ff_transformed_progression_renewable_grouped = ff_transformed_progression_renewable.groupby(["State"], as_index=False).count()
ff_transformed_progression_renewable_grouped

# format DF
ff_transformed_progression_renewable_grouped = ff_transformed_progression_renewable_grouped[["State", "Renewable"]]
ff_transformed_progression_renewable_grouped.rename(columns={'Renewable': 'Renewable_Energy_Plants_2014_2015'}, inplace=True)
ff_transformed_progression_renewable_grouped

Unnamed: 0,State,Renewable_Energy_Plants_2014_2015
0,ACT,4
1,NSW,38
2,NT,2
3,QLD,14
4,SA,20
5,TAS,35
6,VIC,41
7,WA,15


In [5]:
ff_transformed_progression_nonrenewable = ff_transformed_progression.loc[ff_transformed_progression["Renewable"] == False]
# ff_transformed_progression_nonrenewable

# group dataframe by State
ff_transformed_progression_nonrenewable_grouped = ff_transformed_progression_nonrenewable.groupby(["State"], as_index=False).count()
# ff_transformed_progression_nonrenewable_grouped

# format DF
ff_transformed_progression_nonrenewable_grouped = ff_transformed_progression_nonrenewable_grouped[["State", "Renewable"]]
ff_transformed_progression_nonrenewable_grouped.rename(columns={'Renewable': 'Non_Renewable_Energy_Plants_2014_2015'}, inplace=True)
ff_transformed_progression_nonrenewable_grouped

Unnamed: 0,State,Non_Renewable_Energy_Plants_2014_2015
0,NSW,32
1,NT,14
2,QLD,31
3,SA,15
4,TAS,3
5,VIC,22
6,WA,63


In [6]:
energy_progression_merged = pd.merge(ff_transformed_progression_renewable_grouped, ff_transformed_progression_nonrenewable_grouped, how="left", on=["State", "State"])
# energy_progression_merged

In [7]:
se_transformed_progression = se_transformed[["State", "Year", "Renewable"]]
se_transformed_progression.head(2)

Unnamed: 0,State,Year,Renewable
0,SA,2017-2018,True
1,NSW,2017-2018,True


In [8]:
se_transformed_progression_renewable = se_transformed_progression.loc[se_transformed_progression["Renewable"] == True]
# se_transformed_progression_renewable

# group dataframe by State
se_transformed_progression_renewable_grouped = se_transformed_progression_renewable.groupby(["State"], as_index=False).count()
# se_transformed_progression_renewable_grouped

# format DF
se_transformed_progression_renewable_grouped = se_transformed_progression_renewable_grouped[["State", "Renewable"]]
se_transformed_progression_renewable_grouped.rename(columns={'Renewable': 'Renewable_Energy_Plants_2017_2018'}, inplace=True)
se_transformed_progression_renewable_grouped

Unnamed: 0,State,Renewable_Energy_Plants_2017_2018
0,ACT,3
1,NSW,45
2,NT,2
3,QLD,15
4,SA,25
5,TAS,38
6,VIC,47
7,WA,20


In [9]:
energy_progression_merged = pd.merge(energy_progression_merged, se_transformed_progression_renewable_grouped, how="left", on=["State", "State"])
# energy_progression_merged

In [10]:
se_transformed_progression_nonrenewable = se_transformed_progression.loc[se_transformed_progression["Renewable"] == False]
# se_transformed_progression_nonrenewable

# group dataframe by State
se_transformed_progression_nonrenewable_grouped = se_transformed_progression_nonrenewable.groupby(["State"], as_index=False).count()
# se_transformed_progression_nonrenewable_grouped

# format DF
se_transformed_progression_nonrenewable_grouped = se_transformed_progression_nonrenewable_grouped[["State", "Renewable"]]
se_transformed_progression_nonrenewable_grouped.rename(columns={'Renewable': 'Non_Renewable_Energy_Plants_2017_2018'}, inplace=True)
se_transformed_progression_nonrenewable_grouped

Unnamed: 0,State,Non_Renewable_Energy_Plants_2017_2018
0,NSW,28
1,NT,65
2,QLD,32
3,SA,14
4,TAS,1
5,VIC,19
6,WA,71


In [11]:
energy_progression_merged = pd.merge(energy_progression_merged, se_transformed_progression_nonrenewable_grouped, how="left", on=["State", "State"])
# energy_progression_merged

In [12]:
tt_transformed_progression = tt_transformed[["State", "Year", "Renewable"]]
tt_transformed_progression.head(2)

Unnamed: 0,State,Year,Renewable
0,SA,2020-2021,True
1,NSW,2020-2021,True


In [13]:
tt_transformed_progression_renewable = tt_transformed_progression.loc[tt_transformed_progression["Renewable"] == True]
# tt_transformed_progression_renewable

# group dataframe by State
tt_transformed_progression_renewable_grouped = tt_transformed_progression_renewable.groupby(["State"], as_index=False).count()
# tt_transformed_progression_renewable_grouped

# format DF
tt_transformed_progression_renewable_grouped = tt_transformed_progression_renewable_grouped[["State", "Renewable"]]
tt_transformed_progression_renewable_grouped.rename(columns={'Renewable': 'Renewable_Energy_Plants_2020_2021'}, inplace=True)
tt_transformed_progression_renewable_grouped

Unnamed: 0,State,Renewable_Energy_Plants_2020_2021
0,ACT,5
1,NSW,64
2,NT,3
3,QLD,46
4,SA,32
5,TAS,39
6,VIC,65
7,WA,23


In [14]:
energy_progression_merged = pd.merge(energy_progression_merged, tt_transformed_progression_renewable_grouped, how="left", on=["State", "State"])
# energy_progression_merged

In [15]:
tt_transformed_progression_nonrenewable = tt_transformed_progression.loc[tt_transformed_progression["Renewable"] == False]
# tt_transformed_progression_nonrenewable

# group dataframe by State
tt_transformed_progression_nonrenewable_grouped = tt_transformed_progression_nonrenewable.groupby(["State"], as_index=False).count()
# tt_transformed_progression_nonrenewable_grouped

# format DF
tt_transformed_progression_nonrenewable_grouped = tt_transformed_progression_nonrenewable_grouped[["State", "Renewable"]]
tt_transformed_progression_nonrenewable_grouped.rename(columns={'Renewable': 'Non_Renewable_Energy_Plants_2020_2021'}, inplace=True)
tt_transformed_progression_nonrenewable_grouped

Unnamed: 0,State,Non_Renewable_Energy_Plants_2020_2021
0,NSW,22
1,NT,64
2,QLD,65
3,SA,17
4,TAS,1
5,VIC,20
6,WA,71


In [16]:
energy_progression_merged = pd.merge(energy_progression_merged, tt_transformed_progression_nonrenewable_grouped, how="left", on=["State", "State"])
energy_progression_merged = energy_progression_merged.fillna(0)
energy_progression_merged

Unnamed: 0,State,Renewable_Energy_Plants_2014_2015,Non_Renewable_Energy_Plants_2014_2015,Renewable_Energy_Plants_2017_2018,Non_Renewable_Energy_Plants_2017_2018,Renewable_Energy_Plants_2020_2021,Non_Renewable_Energy_Plants_2020_2021
0,ACT,4,0.0,3,0.0,5,0.0
1,NSW,38,32.0,45,28.0,64,22.0
2,NT,2,14.0,2,65.0,3,64.0
3,QLD,14,31.0,15,32.0,46,65.0
4,SA,20,15.0,25,14.0,32,17.0
5,TAS,35,3.0,38,1.0,39,1.0
6,VIC,41,22.0,47,19.0,65,20.0
7,WA,15,63.0,20,71.0,23,71.0


In [None]:
# column for:
# year
# act r
# act nr
# nsw r
# nsw nr

Which state produces the most energy per capita? 

Divided into renewable vs non renewable.

In [18]:
# Read in CSV
population_summary = pd.read_csv("../data/state_population_summary.csv")

# format DF
population_summary.rename(columns={"Unnamed: 0": "State", "Total": "Population_Total"}, inplace=True)
population_summary = population_summary[["State", "Population_Total"]]

# Removing the Australia(b) total line in index 8
population_summary = population_summary.drop(8)

population_summary

Unnamed: 0,State,Population_Total
0,New South Wales,8072163
1,Victoria,6503491
2,Queensland,5156138
3,South Australia,1781516
4,Western Australia,2660026
5,Tasmania,557571
6,Northern Territory,232605
7,Australian Capital Territory,454499


In [19]:
population_summary.at[0, 'State'] = "NSW"
population_summary.at[1, 'State'] = "VIC"
population_summary.at[2, 'State'] = "QLD"
population_summary.at[3, 'State'] = "SA"
population_summary.at[4, 'State'] = "WA"
population_summary.at[5, 'State'] = "TAS"
population_summary.at[6, 'State'] = "NT"
population_summary.at[7, 'State'] = "ACT"
population_summary

Unnamed: 0,State,Population_Total
0,NSW,8072163
1,VIC,6503491
2,QLD,5156138
3,SA,1781516
4,WA,2660026
5,TAS,557571
6,NT,232605
7,ACT,454499


In [20]:
tt_eppc = tt_transformed[["State", "Electricity_Production_MWh", "Renewable"]]
# tt_eppc

tt_eppc_renewable = tt_eppc.loc[tt_eppc["Renewable"] == True]
# tt_eppc_renewable

tt_eppc_renewable = tt_eppc_renewable[["State", "Electricity_Production_MWh"]]
# tt_eppc_renewable

tt_eppc_renewable.replace(',','', regex=True, inplace=True)
tt_eppc_renewable = tt_eppc_renewable.fillna(0)

# convert column "a" of a DataFrame
tt_eppc_renewable["Electricity_Production_MWh"] = pd.to_numeric(tt_eppc_renewable["Electricity_Production_MWh"])
tt_eppc_renewable

# group dataframe by State
tt_eppc_renewable_grouped = tt_eppc_renewable.groupby(["State"], as_index=False).sum()
# tt_eppc_renewable_grouped

# format DF
tt_eppc_renewable_grouped.rename(columns={'Electricity_Production_MWh': 'Renewable_Electricity_Production_MWh'}, inplace=True)
tt_eppc_renewable_grouped

Unnamed: 0,State,Renewable_Electricity_Production_MWh
0,ACT,90253
1,NSW,13462111
2,NT,12510
3,QLD,4746457
4,SA,6607120
5,TAS,10157932
6,VIC,9572497
7,WA,3675688


In [21]:
tt_eppc = tt_transformed[["State", "Electricity_Production_MWh", "Renewable"]]
# tt_eppc

tt_eppc_non_renewable = tt_eppc.loc[tt_eppc["Renewable"] == False]
# tt_eppc_non_renewable

tt_eppc_non_renewable = tt_eppc_non_renewable[["State", "Electricity_Production_MWh"]]
# tt_eppc_non_renewable

tt_eppc_non_renewable.replace(',','', regex=True, inplace=True)
tt_eppc_non_renewable = tt_eppc_non_renewable.fillna(0)

# convert column "a" of a DataFrame
tt_eppc_non_renewable["Electricity_Production_MWh"] = pd.to_numeric(tt_eppc_non_renewable["Electricity_Production_MWh"])
tt_eppc_non_renewable

# group dataframe by State
tt_eppc_non_renewable_grouped = tt_eppc_non_renewable.groupby(["State"], as_index=False).sum()
# tt_eppc_non_renewable_grouped

# format DF
tt_eppc_non_renewable_grouped.rename(columns={'Electricity_Production_MWh': 'Non_Renewable_Electricity_Production_MWh'}, inplace=True)
tt_eppc_non_renewable_grouped

Unnamed: 0,State,Non_Renewable_Electricity_Production_MWh
0,NSW,52599995
1,NT,2141689
2,QLD,59168403
3,SA,5318902
4,TAS,86661
5,VIC,35399396
6,WA,20464224


In [22]:
eppc_merged = pd.merge(tt_eppc_renewable_grouped, tt_eppc_non_renewable_grouped, how="left", on=["State", "State"])
eppc_merged

eppc_merged = eppc_merged.fillna(0)

eppc_merged

Unnamed: 0,State,Renewable_Electricity_Production_MWh,Non_Renewable_Electricity_Production_MWh
0,ACT,90253,0.0
1,NSW,13462111,52599995.0
2,NT,12510,2141689.0
3,QLD,4746457,59168403.0
4,SA,6607120,5318902.0
5,TAS,10157932,86661.0
6,VIC,9572497,35399396.0
7,WA,3675688,20464224.0


In [23]:
eppc_merged = pd.merge(eppc_merged, population_summary, how="left", on=["State", "State"])
eppc_merged

Unnamed: 0,State,Renewable_Electricity_Production_MWh,Non_Renewable_Electricity_Production_MWh,Population_Total
0,ACT,90253,0.0,454499
1,NSW,13462111,52599995.0,8072163
2,NT,12510,2141689.0,232605
3,QLD,4746457,59168403.0,5156138
4,SA,6607120,5318902.0,1781516
5,TAS,10157932,86661.0,557571
6,VIC,9572497,35399396.0,6503491
7,WA,3675688,20464224.0,2660026


In [24]:
# create a column for "Per Student Budget" in the school_summary_merge dataframe
eppc_merged['Renewable_MWh_Per_Capita'] = eppc_merged['Renewable_Electricity_Production_MWh'] / eppc_merged['Population_Total'] 
eppc_merged['Non_Renewable_MWh_Per_Capita'] = eppc_merged['Non_Renewable_Electricity_Production_MWh'] / eppc_merged['Population_Total'] 

eppc_merged

Unnamed: 0,State,Renewable_Electricity_Production_MWh,Non_Renewable_Electricity_Production_MWh,Population_Total,Renewable_MWh_Per_Capita,Non_Renewable_MWh_Per_Capita
0,ACT,90253,0.0,454499,0.198577,0.0
1,NSW,13462111,52599995.0,8072163,1.66772,6.516221
2,NT,12510,2141689.0,232605,0.053782,9.207407
3,QLD,4746457,59168403.0,5156138,0.920545,11.475333
4,SA,6607120,5318902.0,1781516,3.708707,2.985604
5,TAS,10157932,86661.0,557571,18.218186,0.155426
6,VIC,9572497,35399396.0,6503491,1.471901,5.443138
7,WA,3675688,20464224.0,2660026,1.381824,7.693242


In [25]:
energy_production_per_capita = eppc_merged[["State", "Renewable_MWh_Per_Capita", "Non_Renewable_MWh_Per_Capita"]]
energy_production_per_capita

Unnamed: 0,State,Renewable_MWh_Per_Capita,Non_Renewable_MWh_Per_Capita
0,ACT,0.198577,0.0
1,NSW,1.66772,6.516221
2,NT,0.053782,9.207407
3,QLD,0.920545,11.475333
4,SA,3.708707,2.985604
5,TAS,18.218186,0.155426
6,VIC,1.471901,5.443138
7,WA,1.381824,7.693242


Do states with a higher population have a higher percentage of renewable energy production?

State population vs the % of energy productions that is renewable.

In [26]:
population_renewable_energy = eppc_merged[["State", "Population_Total", "Renewable_MWh_Per_Capita"]]
population_renewable_energy

Unnamed: 0,State,Population_Total,Renewable_MWh_Per_Capita
0,ACT,454499,0.198577
1,NSW,8072163,1.66772
2,NT,232605,0.053782
3,QLD,5156138,0.920545
4,SA,1781516,3.708707
5,TAS,557571,18.218186
6,VIC,6503491,1.471901
7,WA,2660026,1.381824


Do states with a higher median income have a higher percentage of renewable energy production? State median income vs the % of energy productions that is renewable.

In [30]:
# Read in CSV
income_summary = pd.read_csv("../data/state_income_summary.csv")
income_summary

# Removing the Australia(b) total line in index 8
income_summary = income_summary.drop(8)

income_summary

Unnamed: 0,State,Median_Income
0,New South Wales,813
1,Victoria,803
2,Queensland,787
3,South Australia,734
4,Western Australia,848
5,Tasmania,701
6,Northern Territory,936
7,Australian Capital Territory,1203


In [31]:
# Change state names to acronyms

income_summary.at[0, 'State'] = "NSW"
income_summary.at[1, 'State'] = "VIC"
income_summary.at[2, 'State'] = "QLD"
income_summary.at[3, 'State'] = "SA"
income_summary.at[4, 'State'] = "WA"
income_summary.at[5, 'State'] = "TAS"
income_summary.at[6, 'State'] = "NT"
income_summary.at[7, 'State'] = "ACT"
income_summary

Unnamed: 0,State,Median_Income
0,NSW,813
1,VIC,803
2,QLD,787
3,SA,734
4,WA,848
5,TAS,701
6,NT,936
7,ACT,1203


In [32]:
income_renewable_energy = pd.merge(population_renewable_energy, income_summary, how="left", on=["State", "State"])
income_renewable_energy

Unnamed: 0,State,Population_Total,Renewable_MWh_Per_Capita,Median_Income
0,ACT,454499,0.198577,1203
1,NSW,8072163,1.66772,813
2,NT,232605,0.053782,936
3,QLD,5156138,0.920545,787
4,SA,1781516,3.708707,734
5,TAS,557571,18.218186,701
6,VIC,6503491,1.471901,803
7,WA,2660026,1.381824,848


In [33]:
income_renewable_energy = income_renewable_energy[["State", "Median_Income", "Renewable_MWh_Per_Capita"]]
income_renewable_energy

Unnamed: 0,State,Median_Income,Renewable_MWh_Per_Capita
0,ACT,1203,0.198577
1,NSW,813,1.66772
2,NT,936,0.053782
3,QLD,787,0.920545
4,SA,734,3.708707
5,TAS,701,18.218186
6,VIC,803,1.471901
7,WA,848,1.381824


# FINAL LIST OF ALL CUSTOM DATA FRAMES FOR VISUALISATIONS

In [34]:
income_renewable_energy

Unnamed: 0,State,Median_Income,Renewable_MWh_Per_Capita
0,ACT,1203,0.198577
1,NSW,813,1.66772
2,NT,936,0.053782
3,QLD,787,0.920545
4,SA,734,3.708707
5,TAS,701,18.218186
6,VIC,803,1.471901
7,WA,848,1.381824


In [35]:
population_renewable_energy

Unnamed: 0,State,Population_Total,Renewable_MWh_Per_Capita
0,ACT,454499,0.198577
1,NSW,8072163,1.66772
2,NT,232605,0.053782
3,QLD,5156138,0.920545
4,SA,1781516,3.708707
5,TAS,557571,18.218186
6,VIC,6503491,1.471901
7,WA,2660026,1.381824


In [36]:
energy_production_per_capita

Unnamed: 0,State,Renewable_MWh_Per_Capita,Non_Renewable_MWh_Per_Capita
0,ACT,0.198577,0.0
1,NSW,1.66772,6.516221
2,NT,0.053782,9.207407
3,QLD,0.920545,11.475333
4,SA,3.708707,2.985604
5,TAS,18.218186,0.155426
6,VIC,1.471901,5.443138
7,WA,1.381824,7.693242


In [37]:
energy_progression_merged

Unnamed: 0,State,Renewable_Energy_Plants_2014_2015,Non_Renewable_Energy_Plants_2014_2015,Renewable_Energy_Plants_2017_2018,Non_Renewable_Energy_Plants_2017_2018,Renewable_Energy_Plants_2020_2021,Non_Renewable_Energy_Plants_2020_2021
0,ACT,4,0.0,3,0.0,5,0.0
1,NSW,38,32.0,45,28.0,64,22.0
2,NT,2,14.0,2,65.0,3,64.0
3,QLD,14,31.0,15,32.0,46,65.0
4,SA,20,15.0,25,14.0,32,17.0
5,TAS,35,3.0,38,1.0,39,1.0
6,VIC,41,22.0,47,19.0,65,20.0
7,WA,15,63.0,20,71.0,23,71.0


In [38]:
income_renewable_energy.to_csv('../export_data/income_renewable_energy.csv')
population_renewable_energy.to_csv('../export_data/population_renewable_energy.csv')
energy_production_per_capita.to_csv('../export_data/energy_production_per_capita.csv')
energy_progression_merged.to_csv('../export_data/energy_progression_merged.csv')