In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path
import seaborn as sns


%matplotlib inline

In [2]:
rba_csv = Path("RBA Cash Rate.csv")
rba_returns = pd.read_csv(rba_csv, index_col="Effective date", parse_dates=True, infer_datetime_format=True)
# rba_returns.describe()
rba_returns.sort_index(ascending = True, inplace = True)
rba_returns.head()

Unnamed: 0_level_0,Change,Cash Rate Target
Effective date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-01-23,-0.50 to -1.00,17.00 to 17.50
1990-02-15,-0.5,16.50 to 17.00
1990-03-07,0,16.50 to 17.00
1990-04-04,-1.00 to -1.50,15.00 to 15.50
1990-05-02,0,15.00 to 15.50


In [3]:
print(rba_returns.isnull().sum())
print(rba_returns[rba_returns.isnull().any(axis = 1)])

Change              0
Cash Rate Target    0
dtype: int64
Empty DataFrame
Columns: [Change, Cash Rate Target]
Index: []


In [4]:
rba_returns.dropna(inplace=True)
rba_returns.isnull().sum()

Change              0
Cash Rate Target    0
dtype: int64

In [5]:
rba_returns.head()

Unnamed: 0_level_0,Change,Cash Rate Target
Effective date,Unnamed: 1_level_1,Unnamed: 2_level_1
1990-01-23,-0.50 to -1.00,17.00 to 17.50
1990-02-15,-0.5,16.50 to 17.00
1990-03-07,0,16.50 to 17.00
1990-04-04,-1.00 to -1.50,15.00 to 15.50
1990-05-02,0,15.00 to 15.50


In [6]:
rba_sorted_clean = rba_returns.loc['2006-01-01':'2022-05-04']
rba_sorted_clean.head()


Unnamed: 0_level_0,Change,Cash Rate Target
Effective date,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-02-08,0.0,5.5
2006-03-08,0.0,5.5
2006-04-05,0.0,5.5
2006-05-03,0.25,5.75
2006-06-07,0.0,5.75


In [7]:
rba_sorted_clean.to_csv('rba_sorted_clean.csv')

In [8]:
gdp_csv = Path("gdp_per_capita.csv")
gdp_per_capita = pd.read_csv(gdp_csv, index_col="Date")
# gdp_returns.describe()
gdp_per_capita.sort_index(ascending = True, inplace = True)
gdp_per_capita.head()

Unnamed: 0_level_0,GDP per capita
Date,Unnamed: 1_level_1
1973-09,1041
1973-12,1089
1974-03,1128
1974-06,1167
1974-09,1213


In [9]:
print(gdp_per_capita.isnull().sum())
print(gdp_per_capita[gdp_per_capita.isnull().any(axis =1)])


GDP per capita    0
dtype: int64
Empty DataFrame
Columns: [GDP per capita]
Index: []


In [10]:
gdp_per_capita.dropna(inplace=True)
gdp_per_capita.isnull().sum()
gdp_sorted_clean = gdp_per_capita.loc['2006-01':'2022-05']

gdp_sorted_clean.head()


Unnamed: 0_level_0,GDP per capita
Date,Unnamed: 1_level_1
2006-03,12353
2006-06,12520
2006-09,12764
2006-12,13050
2007-03,13293


In [11]:
gdp_sorted_clean.to_csv('gdp_sorted_clean.csv')

In [12]:
unemployment_rate_csv = Path("Unemployment rate.csv")
unemployment_rate = pd.read_csv(unemployment_rate_csv, index_col="Date")

# unemplyoment_returns.describe()
unemployment_rate.sort_index(ascending = True, inplace = True)
unemployment_rate.head()

Unnamed: 0_level_0,unemployment rate
Date,Unnamed: 1_level_1
2001-03,6.5%
2001-06,6.9%
2001-09,6.8%
2001-12,6.9%
2002-03,6.4%


In [13]:
unemployment_rate.dropna(inplace=True)
unemployment_rate.isnull().sum()
unemployment_rate_sorted_clean = unemployment_rate.loc['2006-01':'2022-05']
unemployment_rate_sorted_clean.head()




Unnamed: 0_level_0,unemployment rate
Date,Unnamed: 1_level_1
2006-03,4.9%
2006-06,4.8%
2006-09,4.7%
2006-12,4.6%
2007-03,4.5%


In [14]:
unemployment_rate_sorted_clean.to_csv('unemployment_rate_sorted_clean.csv')

In [15]:
cpi_rate_csv = Path("CPI.csv")
cpi_rate = pd.read_csv(cpi_rate_csv, index_col="Date")
# cpi_rate.describe()
cpi_rate.sort_index(ascending = True, inplace = True)
cpi_rate.head()

Unnamed: 0_level_0,inflation_rate
Date,Unnamed: 1_level_1
1922-06,2.8
1922-09,2.8
1922-12,2.7
1923-03,2.7
1923-06,2.8


In [16]:
cpi_rate.dropna(inplace=True)
cpi_rate.isnull().sum()

inflation_rate    0
dtype: int64

In [17]:
cpi_rate_sorted_clean = cpi_rate.loc['2006-01':'2022-05']
cpi_rate_sorted_clean.head()



Unnamed: 0_level_0,inflation_rate
Date,Unnamed: 1_level_1
2006-03,84.5
2006-06,85.9
2006-09,86.7
2006-12,86.6
2007-03,86.6


In [18]:
cpi_rate_sorted_clean.to_csv('cpi_rate_sorted_clean.csv')

In [19]:
# quarterly comparison of gdp,cpi and unemplyoment rate.
quarterly_comparison = pd.concat([gdp_sorted_clean, unemployment_rate_sorted_clean, cpi_rate_sorted_clean],axis="columns", join="inner")

In [20]:
quarterly_comparison

Unnamed: 0_level_0,GDP per capita,unemployment rate,inflation_rate
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-03,12353,4.9%,84.5
2006-06,12520,4.8%,85.9
2006-09,12764,4.7%,86.7
2006-12,13050,4.6%,86.6
2007-03,13293,4.5%,86.6
2007-06,13465,4.3%,87.7
2007-09,13623,4.2%,88.3
2007-12,13819,4.3%,89.1
2008-03,14106,4.1%,90.3
2008-06,14420,4.2%,91.6
