# Importing Modules

In [2]:
import pandas as pd
import numpy as np

# Data Preprocessing 

## Importing Data

In [5]:
# Create a list of columns to be used for analysis

colnames = ["Year", 
            "Population", 
            "GDP",
            "Energy Consumption",
            "Energy Consumption Per Capita", 
            "Energy Intensity",
            "Energy Productivity"
           ]

# Import data from the excel file

economics = pd.read_excel("Data/australian_energy_statistics_2024_table_b.xlsx",
                          sheet_name = "AUS",
                          usecols = [1,2,3,4,5,6,7],
                          skiprows = 5,
                          skipfooter = 5,
                          names = colnames,
                         )
economics

Unnamed: 0,Year,Population,GDP,Energy Consumption,Energy Consumption Per Capita,Energy Intensity,Energy Productivity
0,1960-61,10548267,308362,1336.600,126.713,4334.516,230.706
1,1961-62,10742291,312353,1365.800,127.142,4372.617,228.696
2,1962-63,10950379,331770,1432.200,130.790,4316.846,231.651
3,1963-64,11166702,354929,1531.400,137.140,4314.666,231.768
4,1964-65,11387665,376155,1625.300,142.725,4320.825,231.437
...,...,...,...,...,...,...,...
58,2018-19,25334826,2198775,6188.282,244.260,2814.423,355.313
59,2019-20,25649248,2191416,6001.048,233.966,2738.434,365.172
60,2020-21,25685412,2237695,5769.218,224.611,2578.197,387.868
61,2021-22,26014399,2333221,5765.137,221.613,2470.892,404.712


## Summary Statistics

In [7]:
economics.info(memory_usage = "deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Year                           63 non-null     object 
 1   Population                     63 non-null     int64  
 2   GDP                            63 non-null     int64  
 3   Energy Consumption             63 non-null     float64
 4   Energy Consumption Per Capita  63 non-null     float64
 5   Energy Intensity               63 non-null     float64
 6   Energy Productivity            63 non-null     float64
dtypes: float64(4), int64(2), object(1)
memory usage: 6.5 KB


In [8]:
economics.describe()

Unnamed: 0,Population,GDP,Energy Consumption,Energy Consumption Per Capita,Energy Intensity,Energy Productivity
count,63.0,63.0,63.0,63.0,63.0,63.0
mean,17754930.0,1123409.0,4042.955937,219.459698,3938.108032,262.253254
std,4548620.0,622775.3,1558.650397,41.247345,650.572407,51.923891
min,10548270.0,308362.0,1336.6,126.713,2447.234,207.437
25%,14112660.0,603086.5,2818.35,198.728,3499.379,227.0175
50%,17478640.0,926367.0,3982.7,227.861,4234.935,236.131
75%,21038410.0,1638237.0,5731.1605,253.057,4404.954,285.8415
max,26658950.0,2403614.0,6188.282,274.827,4820.75,408.625


## Converting the Fiscal Year column

In [10]:
# define a function to convert the fiscal years to calendar year

def convert_fiscal_year(x):
    x = int(x[0:4]) + 1
    return x

# apply the function to year column

economics["Year"] = economics["Year"].apply(convert_fiscal_year)

economics

Unnamed: 0,Year,Population,GDP,Energy Consumption,Energy Consumption Per Capita,Energy Intensity,Energy Productivity
0,1961,10548267,308362,1336.600,126.713,4334.516,230.706
1,1962,10742291,312353,1365.800,127.142,4372.617,228.696
2,1963,10950379,331770,1432.200,130.790,4316.846,231.651
3,1964,11166702,354929,1531.400,137.140,4314.666,231.768
4,1965,11387665,376155,1625.300,142.725,4320.825,231.437
...,...,...,...,...,...,...,...
58,2019,25334826,2198775,6188.282,244.260,2814.423,355.313
59,2020,25649248,2191416,6001.048,233.966,2738.434,365.172
60,2021,25685412,2237695,5769.218,224.611,2578.197,387.868
61,2022,26014399,2333221,5765.137,221.613,2470.892,404.712


In [11]:
economics.set_index("Year", inplace = True)

In [12]:
economics

Unnamed: 0_level_0,Population,GDP,Energy Consumption,Energy Consumption Per Capita,Energy Intensity,Energy Productivity
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1961,10548267,308362,1336.600,126.713,4334.516,230.706
1962,10742291,312353,1365.800,127.142,4372.617,228.696
1963,10950379,331770,1432.200,130.790,4316.846,231.651
1964,11166702,354929,1531.400,137.140,4314.666,231.768
1965,11387665,376155,1625.300,142.725,4320.825,231.437
...,...,...,...,...,...,...
2019,25334826,2198775,6188.282,244.260,2814.423,355.313
2020,25649248,2191416,6001.048,233.966,2738.434,365.172
2021,25685412,2237695,5769.218,224.611,2578.197,387.868
2022,26014399,2333221,5765.137,221.613,2470.892,404.712


## Creating the 1990 to 2023 Economics DataFrame

In [14]:
# Filter data to 1990 to 2023 time period

economics_1990_2023 = economics.loc[1990:2023]

economics_1990_2023

Unnamed: 0_level_0,Population,GDP,Energy Consumption,Energy Consumption Per Capita,Energy Intensity,Energy Productivity
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1990,17065128,925958,3945.9,231.226,4261.424,234.663
1991,17284036,922422,3949.9,228.529,4282.096,233.53
1992,17478635,926367,3982.7,227.861,4299.268,232.598
1993,17634808,963840,4081.8,231.463,4234.935,236.131
1994,17805468,1002198,4181.9,234.866,4172.728,239.651
1995,18004882,1041115,4365.4,242.457,4193.005,238.493
1996,18224767,1081343,4505.5,247.219,4166.578,240.005
1997,18423037,1123658,4611.1,250.29,4103.651,243.685
1998,18607584,1176057,4777.6,256.756,4062.388,246.161
1999,18812264,1235185,4884.7,259.655,3954.63,252.868


In [15]:
# Write data to a csv file 

economics_1990_2023.to_csv("data/economics_1990_2023.csv")