In [145]:
# importing the required modules
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
 
# specifying the path to csv files
income_1csv = Path("./Resources/Income/Australian Bureau of Statistics.csv")
income_2csv = Path("./Resources/Income/Australian Bureau of Statistics (1).csv")
income_3csv = Path("./Resources/Income/Australian Bureau of Statistics (2).csv")
income_4csv = Path("./Resources/Income/Australian Bureau of Statistics (3).csv")

income_1 = pd.read_csv(income_1csv)
income_2 = pd.read_csv(income_2csv)
income_3 = pd.read_csv(income_3csv)
income_4 = pd.read_csv(income_4csv)

#income_1.head()
new_header = income_2.iloc[0] #grab the first row for the header
income_2 = income_2[1:] #take the data less the header row
income_2.columns = new_header #set the header row as the df header
income_2.columns = income_2.columns.fillna('Unnamed: 0')

In [146]:
income_1.loc[2:2]

Unnamed: 0.1,Unnamed: 0,August 2021,August 2020,August 2015,2020 to 2021(% change),2015 to 2021(% change p.a.)
2,Total,$1200,$1150,$1000,4.3%,3.3%


In [147]:
income_2_sub_df = income_2.loc[4:4][['Unnamed: 0','August 2023','August 2018']]
income_2_sub_df.columns = ['Unnamed: 0', '2023','2018']
income_2_sub_df

Unnamed: 0.1,Unnamed: 0,2023,2018
4,Total,"$1,300","$1,075"


In [148]:
income_3_sub_df = income_3.loc[2:2][['Unnamed: 0', 'August 2022', 'August 2021']]
income_3_sub_df

Unnamed: 0.1,Unnamed: 0,August 2022,August 2021
2,Total,"$1,250","$1,200"


In [149]:
income_4_sub_df = income_4.loc[2:2][['Unnamed: 0', 'August 2020', 'August 2019']]
income_4_sub_df

Unnamed: 0.1,Unnamed: 0,August 2020,August 2019
2,Total,$1150,$1100


In [150]:
concat_3_4_df = pd.concat([income_3_sub_df, income_4_sub_df], axis = 1)#.columns
concat_3_4_df.columns = ['Unnamed: 0', '2022','2021','to_be_removed','2020','2019']

concat_3_4_df = concat_3_4_df.drop(concat_3_4_df.columns[3], axis=1)
concat_3_4_df

Unnamed: 0.1,Unnamed: 0,2022,2021,2020,2019
2,Total,"$1,250","$1,200",$1150,$1100


In [151]:
merged_df = pd.merge(income_2_sub_df, concat_3_4_df, on = 'Unnamed: 0')
merged_df = merged_df.transpose()
new_header = merged_df.iloc[0] #grab the first row for the header
merged_df = merged_df[1:] #take the data less the header row
merged_df.columns = new_header #set the header row as the df header
merged_df.reset_index(inplace = True)
merged_df.columns = ["Year", "Income"]
merged_df

Unnamed: 0,Year,Income
0,2023,"$1,300"
1,2018,"$1,075"
2,2022,"$1,250"
3,2021,"$1,200"
4,2020,$1150
5,2019,$1100


In [152]:
merged_df['Income'] = merged_df['Income'].str.replace("$","",regex=False)
merged_df['Income'] = merged_df['Income'].str.replace(",","",regex=False)
merged_df['Income'] = merged_df['Income'].astype(int)
merged_df['Year'] = merged_df['Year'].astype(int)
merged_df = merged_df.sort_values('Year')
merged_df

Unnamed: 0,Year,Income
1,2018,1075
5,2019,1100
4,2020,1150
3,2021,1200
2,2022,1250
0,2023,1300


In [153]:
merged_df['Annual_Income'] = merged_df['Income'] * 52
merged_df

Unnamed: 0,Year,Income,Annual_Income
1,2018,1075,55900
5,2019,1100,57200
4,2020,1150,59800
3,2021,1200,62400
2,2022,1250,65000
0,2023,1300,67600


In [154]:
merged_df['Income_Shift'] = merged_df['Annual_Income'].shift(1)
merged_df

Unnamed: 0,Year,Income,Annual_Income,Income_Shift
1,2018,1075,55900,
5,2019,1100,57200,55900.0
4,2020,1150,59800,57200.0
3,2021,1200,62400,59800.0
2,2022,1250,65000,62400.0
0,2023,1300,67600,65000.0


In [155]:
merged_df['Income_Change'] = round((merged_df['Annual_Income'] /merged_df['Income_Shift']) * 100 - 100,2)
merged_df

Unnamed: 0,Year,Income,Annual_Income,Income_Shift,Income_Change
1,2018,1075,55900,,
5,2019,1100,57200,55900.0,2.33
4,2020,1150,59800,57200.0,4.55
3,2021,1200,62400,59800.0,4.35
2,2022,1250,65000,62400.0,4.17
0,2023,1300,67600,65000.0,4.0


In [156]:
merged_df.to_csv('income_clean.csv', index = False)

In [105]:
merged_df.dtypes

Quarter    object
Income      int32
dtype: object

In [3]:
income_1_tranpose = income_1.transpose()
income_2_tranpose = income_2.transpose()
income_3_tranpose = income_3.transpose()
income_4_tranpose = income_4.transpose()
income_1_tranpose.head()

Unnamed: 0,0,1,2,3,4
Unnamed: 0,Men,Women,Total,,"Source: Australian Bureau of Statistics, Emplo..."
August 2021,$1380,$1018,$1200,,
August 2020,$1300,$1000,$1150,,
August 2015,$1187,$850,$1000,,
2020 to 2021(% change),6.2%,1.8%,4.3%,,


In [10]:
income_1_clean=income_1_tranpose.iloc[:,0:3]
income_3_clean=income_3_tranpose.iloc[:,0:3]
income_1_clean.head()
income_3_clean.head()

Unnamed: 0,0,1,2
Unnamed: 0,Men,Women,Total
August 2022,"$1,425","$1,094","$1,250"
August 2021,"$1,380","$1,030","$1,200"
August 2017,"$1,200",$899,"$1,019"
2021 to 2022(% change),3.3%,6.2%,4.2%


In [15]:
Columns = ["Men","Women","Total"]

In [27]:

#clean_income1 = income_1_tranpose, header=None, names=Columns)
income_1_clean.columns=Columns
income_3_clean.columns=Columns

clean_income1=income_1_clean.drop('Unnamed: 0')
clean_income3=income_3_clean.drop('Unnamed: 0')
#clean_income1.head()
#clean_income3.head()
print(clean_income1)
print(clean_income3)

                               Men  Women  Total
August 2021                  $1380  $1018  $1200
August 2020                  $1300  $1000  $1150
August 2015                  $1187   $850  $1000
2020 to 2021(% change)        6.2%   1.8%   4.3%
2015 to 2021(% change p.a.)   2.7%   3.3%   3.3%
                                Men   Women   Total
August 2022                  $1,425  $1,094  $1,250
August 2021                  $1,380  $1,030  $1,200
August 2017                  $1,200    $899  $1,019
2021 to 2022(% change)         3.3%    6.2%    4.2%
2017 to 2022(% change p.a.)    3.5%    4.0%    4.2%


In [29]:
Combined_income_csv = pd.concat([clean_income1, clean_income3], sort=False)
#Combined_income_csv = pd.merge(clean_income1, clean_income3, how="left", left_index=True, right_index=True)

#Combined_income_csv = pd.merge(Combined_income_csv, income_3_tranpose, on=["Men","Women","Total"])
#Combined_income_csv = pd.merge(Combined_income_csv, income_4_tranpose, on=["Men","Women","Total"])

Combined_income_csv

#Combined_income_csv.to_csv('income_output.csv', index=False)

Unnamed: 0,Men,Women,Total
August 2021,$1380,$1018,$1200
August 2020,$1300,$1000,$1150
August 2015,$1187,$850,$1000
2020 to 2021(% change),6.2%,1.8%,4.3%
2015 to 2021(% change p.a.),2.7%,3.3%,3.3%
August 2022,"$1,425","$1,094","$1,250"
August 2021,"$1,380","$1,030","$1,200"
August 2017,"$1,200",$899,"$1,019"
2021 to 2022(% change),3.3%,6.2%,4.2%
2017 to 2022(% change p.a.),3.5%,4.0%,4.2%
