In [None]:
### NEXT:
### RBA Debit Card Usage [Ready to use - Combine with Cash data from APRA]
### APRA Monthly Stats

#############################################################

# Project: External Finance Data 

# Step: Extract and clean

# Purpose: Extract and clean various financial data sources that
# are released by regulators. This data can be used to provide
# insights into high level consumer finance trends. 
# This code avoids the need to manually download the data.

# Author: Michael Letheby

# Version: 
#    1.0 - 1/11/2021 
#        - Created initial version with ABS Mortgages and RBA Card data
#    1.1 - 21/11/2021
#        - Cleaned up code and added full commentary

#############################################################

In [5]:
#############################################################

# Section: Libraries

#############################################################

import pandas as pd # Data Analysis Library
import matplotlib.pyplot as plt # Data Visualisation Library
import matplotlib.ticker as ticker
%matplotlib inline

import seaborn as sns # Data Visualisation Library
import requests # For downloading 
import datetime
import re # Regex
import numbers 

import pickle # for saving/loading files

pd.options.mode.chained_assignment = None  # default='warn'

In [6]:
#############################################################

# Section: Functions

#############################################################

# pickle_save: save the files after importing and reading them
def pickle_save(name, to_save):
    with open('./Data/' + name + '.pickle', 'wb') as handle:
        pickle.dump(to_save, handle, protocol=pickle.HIGHEST_PROTOCOL)
# picle_load: load previously saved files
def pickle_load(name):
    with open('./Data/' + name + '.pickle', 'rb') as handle:
        load_data = pickle.load(handle)
    return load_data

# match: search each string element within a list ('list_search') in a string ('in_string') and  
# return the match. Used to define the type of variable within the ABS Lending Indicator datasets.
def match(list_search, in_string):
    # need to add restrictions on input types to list + string
    result = [f for f in list_search if re.search(f, desclist)] 
    return(result)

# human_format: format numbers to be more readable 
def human_format(num):
    num = float('{:.3g}'.format(num))
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])

In [21]:
#############################################################

# Section: Import external files

#############################################################

# file_dict: contains the external files to be downloaded. 
    # dl_name: filename
    # dl_source: website the file is sourced from
    # series: ID for columns to be extracted
    # row_drop: the rows to filter out of the file
    # dl_url: location of the file
    # sheet_name: name of the excel sheet to download
    # filetype: type of file to be downloaded
    # ID_row: row contains the unique IDs (series)
    # publication_row: row that contains the latest date
    # skiprow: the number of rows to skip when reading the file
    
file_dict = {}  
file_dict = {
    
    'RBA' : {
        'Credit Card Data - Australia' : {
            'Metadata' : {
                'dl_name': 'c01-2-hist',
                'series': ['CCCCSNAP', 'CCCCSBAIP', 'CCCCSBTP', 'CCCCSCLP', 'CCCCSTTNP', 'CCCCSTTVP']
            }
        },    
        'Debit Card Data - Australia' : {
            'Metadata' : {
                'dl_name': 'c02-1-hist',
                'series': ['CDCNA', 'CDCPTN', 'CDCPTV']
            }

        }
    },
    'ABS' : {
        'Household Finance - Total' : {
            'Metadata' : {
                'dl_name': '560101',
                'series':  ['A108295286C','A108299115J','A108276981A','A108299885C','A108271213V','A108294614K']
            }
        },
        'Household Finance - Total by Lender' : {
            'Metadata' : {
                'dl_name': '560102',
                'series':  ['A108286039A','A108312356A','A108286046X','A108276379K','A108312352T','A108285311J',
                            'A108272326J','A108312354W','A108267776W']
            }
        },
        'Household Finance - Owner Occupied by Purpose' : {            
            'Metadata' : {
                'dl_name': '560103',
                'series':  ['A108299116K','A108284976J','A108280580R','A108299018F','A108298969V','A108294237R',
                            'A108294069R','A108280461X','A108299115J','A108284975F','A108280579F','A108299017C',
                            'A108298968T','A108294236L','A108294068L','A108280460W']
            }
        },
        'Household Finance - Owner Occupied by State' : {            
            'Metadata' : {
                'dl_name': '560104',
                'series':  ['A108299116K','A108271760F','A108289778K','A108267070V','A108299074X','A108299088L',
                            'A108280692J','A108299109L','A108285277R','A108299115J','A108271759W','A108289777J',
                            'A108267069K','A108299073W','A108299087K','A108280691F','A108299108K','A108285276L']
            }
        },
        'Household Finance - Owner Occupied - NSW' : {            
            'Metadata' : {
                'dl_name': '560105',
                'series':  ['A108271760F','A108271403J','A108294300V','A108271662A','A108271578K','A108276100R',
                            'A108289421V','A108284899R','A108271759W','A108271402F','A108294299W','A108271661X',
                            'A108271577J','A108276099T','A108289420T','A108284898L']
            }
        },
        'Household Finance - Owner Occupied - VIC' : {            
            'Metadata' : {
                'dl_name': '560106',
                'series':  ['A108289778K','A108284955W','A108271501L','A108276240T','A108271557X','A108276072T',
                            'A108266601A','A108266664K','A108289777J','A108284954V','A108271500K','A108276239J',
                            'A108271556W','A108276071R','A108266600X','A108266663J']
            }
        },
        'Household Finance - Owner Occupied - QLD' : {         
            'Metadata' : {
                'dl_name': '560107',
                'series':  ['A108267070V','A108284948X','A108266825L','A108271641R','A108266888W','A108294223A',
                            'A108275911A','A108298752F','A108267069K','A108284947W','A108266824K','A108271640L',
                            'A108266887V','A108294222X','A108275910X','A108298751C']
            }
        },
        'Household Finance - Owner Occupied - SA' : {            
            'Metadata' : {
                'dl_name': '560108',
                'series':  ['A108299074X','A108294139K','A108298871W','A108276212J','A108276149C','A108289533L',
                            'A108298682K','A108298710J','A108299073W','A108294138J','A108298870V','A108276211F',
                            'A108276148A','A108289532K','A108298681J','A108298709X']
            }
        },
        'Household Finance - Owner Occupied - WA' : {
            'Metadata' : {
                'dl_name': '560109',
                'series':  ['A108299088L','A108271368L','A108298885K','A108266923T','A108280601R','A108280510J',
                            'A108289386X','A108284871L','A108299087K','A108271367K','A108298884J','A108266922R',
                            'A108280600L','A108280509X','A108289385W','A108284870K']
            }
        },
        'Household Finance - Owner Occupied - TAS' : {
            'Metadata' : {
                'dl_name': '560110',
                'series':  ['A108280692J','A108298780R','A108266783A','A108271606J','A108298941T','A108280503K',
                            'A108294041L','A108289449W','A108280691F','A108298779F','A108266782X','A108271605F',
                            'A108298940R','A108280502J','A108294040K','A108289448V']
            }
        },
        'Household Finance - Owner Occupied - NT' : {
            'Metadata' : {
                'dl_name': '560111',
                'series':  ['A108299109L','A108276016X','A108280573T','A108280664X','A108271571V','A108276086F',
                            'A108275918T','A108289456V','A108299108K','A108276015W','A108280572R','A108280663W',
                            'A108271570T','A108276085C','A108275917R','A108289455T']
            }
        },
        'Household Finance - Owner Occupied - ACT' : {
            'Metadata' : {
                'dl_name': '560112',
                'series':  ['A108285277R','A108294167V','A108266804A','A108271620C','A108266867K','A108276058W',
                            'A108266587T','A108271312A','A108285276L','A108294166T','A108266803X','A108271619V',
                            'A108266866J','A108276057V','A108266586R','A108271311X']
            }
        },
        'Household Finance - Investor by Purpose' : {
            'Metadata' : {
                'dl_name': '560113',
                'series':  ['A108276982C','A108276723K','A108285662K','A108272187V','A108299536F','A108294839X',
                            'A108299284W','A108289981T','A108276981A','A108276722J','A108285661J','A108272186T',
                            'A108299535C','A108294838W','A108299283V','A108289980R']
            }
        },
        'Household Finance - Investor by State' : {
            'Metadata' : {
                'dl_name': '560114',
                'series':  ['A108276982C','A108290331L','A108299732R','A108272299L','A108272257R','A108267742X',
                            'A108290303C','A108267763K','A108295077J','A108276981A','A108290330K','A108299731L',
                            'A108272298K','A108272256L','A108267741W','A108290302A','A108267762J','A108295076F']
            }
        },
        'Household Finance - Investor - NSW' : {
            'Metadata' : {
                'dl_name': '560115',
                'series':  ['A108290331L','A108280986R','A108285648R','A108299592X','A108272152V','A108281042X',
                            'A108294685V','A108271949K','A108290330K','A108280985L','A108285647L','A108299591W',
                            'A108272151T','A108281041W','A108294684T','A108271948J']
            }
        },  
        'Household Finance - Investor - VIC' : {
            'Metadata' : {
                'dl_name': '560116',
                'series':  ['A108299732R','A108267392K','A108281070J','A108285725J','A108285683W','A108290072F',
                            'A108271893K','A108294748T','A108299731L','A108267391J','A108281069X','A108285724F',
                            'A108285682V','A108290071C','A108271892J','A108294747R']
            }
        },
        'Household Finance - Investor - QLD' : {
            'Metadata' : {
                'dl_name': '560117',
                'series':  ['A108272299L','A108272005V','A108267511R','A108295000K','A108290170K','A108281028C',
                            'A108276541W','A108271942V','A108272298K','A108272004T','A108267510L','A108294999K',
                            'A108290169A','A108281027A','A108276540V','A108271941T']
            }
        },
        'Household Finance - Investor - SA' : {
            'Metadata' : {
                'dl_name': '560118',
                'series':  ['A108272257R','A108267350L','A108285620L','A108294958R','A108276821R','A108294783X',
                            'A108267252J','A108280916J','A108272256L','A108267349C','A108285619C','A108294957L',
                            'A108276820L','A108294782W','A108267251F','A108280915F']
            }
        },
        'Household Finance - Investor - WA' : {
            'Metadata' : {
                'dl_name': '560119',
                'series':  ['A108267742X','A108267364A','A108276758K','A108276877A','A108299508W','A108290044W',
                            'A108289918A','A108280930C','A108267741W','A108267363X','A108276757J','A108276876X',
                            'A108299507V','A108290043V','A108289917X','A108280929V']
            }
        },
        'Household Finance - Investor - TAS' : {
            'Metadata' : {
                'dl_name': '560120',
                'series':  ['A108290303C','A108267357C','A108272068C','A108290198L','A108281119K','A108290037X',
                            'A108271879R','A108276597J','A108290302A','A108267356A','A108272067A','A108290197K',
                            'A108281118J','A108290036W','A108271878L','A108276596F']
            }
        },
        'Household Finance - Investor - NT' : {
            'Metadata' : {
                'dl_name': '560121',
                'series':  ['A108267763K','A108267406L','A108276793T','A108290226K','A108294930L','A108299424L',
                            'A108271900W','A108276639V','A108267762J','A108267405K','A108276792R','A108290225J',
                            'A108294929C','A108299423K','A108271899X','A108276638T']
            }
        },
        'Household Finance - Investor - ACT' : {
            'Metadata' : {
                'dl_name': '560122',
                'series':  ['A108295077J','A108267371X','A108276772F','A108281168F','A108294895T','A108299410X',
                            'A108280874W','A108294713T','A108295076F','A108267370W','A108276771C','A108281167C',
                            'A108294894R','A108299409R','A108280873V','A108294712R']
            }
        },
        'Household Finance - FHB by State' : {
            'Metadata' : {
                'dl_name': '560123',
                'series':  ['A108299886F','A108295203K','A108285956T','A108299851F','A108290415W','A108281343C',
                            'A108272390A','A108281385A','A108285942C','A108299885C','A108295202J','A108285955R',
                            'A108299850C','A108290414V','A108281342A','A108272389T','A108281384X','A108285941A']
            }
        },
        'Household Finance - Owner Occupier - FHB' : {
            'Metadata' : {
                'dl_name': '560124',
                'series':  ['A108271214W','A108271207X','A108298654A','A108298647C','A108298612C','A108280335K',
                            'A108271179A','A108266545V','A108275841F','A108271213V','A108271206W','A108298653X',
                            'A108298646A','A108298611A','A108280334J','A108271178X','A108266544T','A108275840C']
            }
        },
        'Household Finance - Investor - FHB' : {
            'Metadata' : {
                'dl_name': '560125',
                'series':  ['A108294615L','A108289876R','A108280832X','A108294608R','A108271816F','A108276471A',
                            'A108276457F','A108289869T','A108294594L','A108294614K','A108289875L','A108280831W',
                            'A108294607L','A108271815C','A108276470X','A108276456C','A108289868R','A108294593K']
            }
        },
        'Household Finance - Non-Resident' : {
            'Metadata' : {
                'dl_name': '560126',
                'series':  ['A108289120R','A108289119F']
            }
        }
    }
}

for source in file_dict:
    if source == 'RBA':
        for key in file_dict[source]:
            file_dict[source][key]['Metadata']['row_drop'] = ['Description', 'Frequency', 'Type', 'Units', 'Source', 
                                                              'Publication date', 'Series ID']
            file_dict[source][key]['Metadata']['dl_url'] = 'https://www.rba.gov.au/statistics/tables/xls/'
            file_dict[source][key]['Metadata']['sheet_name'] = 'Data'
            file_dict[source][key]['Metadata']['filetype'] = '.xlsx'
            file_dict[source][key]['Metadata']['ID_row'] = 'Series ID'
            file_dict[source][key]['Metadata']['unit_row'] = 'Units'
            file_dict[source][key]['Metadata']['publication_row'] = 'Publication date'
            file_dict[source][key]['Metadata']['skiprow'] = 1

    elif source == 'ABS':
        for key in file_dict[source]:
            file_dict[source][key]['Metadata']['row_drop'] = ['Unit', 'Series Type', 'Data Type', 'Frequency', 
                                                              'Collection Month', 'No. Obs', 'Series Start', 
                                                              'Series End', 'Series ID']
            file_dict[source][key]['Metadata']['dl_url'] = 'https://www.abs.gov.au/statistics/economy/finance/lending-indicators/latest-release/'
            file_dict[source][key]['Metadata']['sheet_name'] = 'Data1'
            file_dict[source][key]['Metadata']['filetype'] = '.xls'
            file_dict[source][key]['Metadata']['ID_row'] = 'Series ID'
            file_dict[source][key]['Metadata']['unit_row'] = 'Unit'
            file_dict[source][key]['Metadata']['publication_row'] = 'Series End'
            file_dict[source][key]['Metadata']['skiprow'] = 0

In [22]:
# Download all the external files from the file_dict dictionary
for source in file_dict:
    for key in file_dict[source]:
        dl_name = file_dict[source][key]['Metadata']['dl_name'] + file_dict[source][key]['Metadata']['filetype']
        dl_url = file_dict[source][key]['Metadata']['dl_url'] + dl_name
        r = requests.get(dl_url)
        with open("./Input/" + dl_name, 'wb') as f:
                  f.write(r.content)   

In [23]:
# External files are imported and cleaned.Each external file is saved into the dictionary 
# ('import_file_dict') with a lookup which details the unique series ID, description and units.

for source in file_dict:
    
    for key in file_dict[source]:   
        
        data_name = file_dict[source][key]['Metadata']['dl_name']
        row_filter = file_dict[source][key]['Metadata']['row_drop']
        series_filter = file_dict[source][key]['Metadata']['series']

        read_path = './Input/' + data_name + file_dict[source][key]['Metadata']['filetype']
        df = pd.read_excel(read_path, sheet_name = file_dict[source][key]['Metadata']['sheet_name'], 
                           skiprows = file_dict[source][key]['Metadata']['skiprow'])
        df = df.rename(columns={ df.columns[0]: 'Title' })

        description = df.columns.values

        series_id_row = df[df['Title'] == 'Series ID'].values.tolist()[0]
        series_id_row[0] = 'Title'

        df.columns = series_id_row

        # Extract Publication Date
        pub_date = df.loc[df['Title'] == file_dict[source][key]['Metadata']['publication_row']].values[0,1]
        if type(pub_date) == datetime.datetime:
            pub_date = pub_date.strftime("%d-%b-%Y")
        print(file_dict[source][key]['Metadata']['dl_name'], 'latest publication date:', pub_date) #loc = index check

        # Transform Data 
        series_id = df[df['Title'] == file_dict[source][key]['Metadata']['ID_row']].values[0] # Series ID values
        units = df[df['Title'] == file_dict[source][key]['Metadata']['unit_row']].values[0] # Unit values
        series_lookup = pd.DataFrame(list(zip(series_id,description,units)), columns=['Series ID','Description','Unit'])
        series_lookup = series_lookup[series_lookup['Series ID'] != 'Series ID'] # Need to save these in the dictionary output
        series_lookup = series_lookup.loc[series_lookup['Series ID'].isin(series_filter), :]

        # select the series_to_filter
        column_filter = series_lookup.loc[:,'Series ID'].values.tolist()
        column_filter.insert(0, 'Title')

        # Drop Rows
        df_fmt = df[~df['Title'].isin(row_filter)]
        df_fmt = df_fmt.dropna(subset=['Title']) # Remove rows in first column with NA

        for i in range(len(units)):

            if source == 'RBA':
                if units[i].strip() == """'000""":
                    df_fmt.iloc[:,i] = df_fmt.iloc[:,i] * 1000
                elif units[i].strip() == '$ million':
                    df_fmt.iloc[:,i] = df_fmt.iloc[:,i] * 1000000
            elif source == 'ABS':
                if units[i].strip() == '$ Millions':
                    df_fmt.iloc[:,i] = df_fmt.iloc[:,i] * 1000000

        # Drop columns
        df_fmt = df_fmt.loc[:,column_filter] # Keep relevant series

        df_fmt = df_fmt.reset_index(drop=True)
        df_fmt = df_fmt.rename(columns={'Title': 'Date'})
        df_fmt = df_fmt.convert_dtypes() # Convert variable types
        
        file_dict[source][key]['Import_Data'] = df_fmt 
        file_dict[source][key]['Lookup'] = series_lookup

c01-2-hist latest publication date: 08-Nov-2021
c02-1-hist latest publication date: 08-Nov-2021
560101 latest publication date: 01-Sep-2021
560102 latest publication date: 01-Sep-2021
560103 latest publication date: 01-Sep-2021
560104 latest publication date: 01-Sep-2021
560105 latest publication date: 01-Sep-2021
560106 latest publication date: 01-Sep-2021
560107 latest publication date: 01-Sep-2021
560108 latest publication date: 01-Sep-2021
560109 latest publication date: 01-Sep-2021
560110 latest publication date: 01-Sep-2021
560111 latest publication date: 01-Sep-2021
560112 latest publication date: 01-Sep-2021
560113 latest publication date: 01-Sep-2021
560114 latest publication date: 01-Sep-2021
560115 latest publication date: 01-Sep-2021
560116 latest publication date: 01-Sep-2021
560117 latest publication date: 01-Sep-2021
560118 latest publication date: 01-Sep-2021
560119 latest publication date: 01-Sep-2021
560120 latest publication date: 01-Sep-2021
560121 latest publicatio

In [24]:
# Save the imported data
pickle_save('imported_data_initial',file_dict)

# Load the imported data
#file_dict = pickle_load('imported_data_initial')

In [25]:
#############################################################

# Section: Combine ABS Tables

#############################################################

# variable_dict: match terms used to define the variables

variable_dict = {
    
    'RBA' : {
        'measure_type' : {
            'list' : ['number of accounts', 'total number of transactions', 'total value of transactions', 
                      'balances accruing interest', 'total balances', 'credit limits'],
            'alias' : {
                'number of accounts' : 'accounts',
                'total number of transactions' : 'transactions',
                'total value of transactions' : 'transaction value',
                'balances accruing interest' : 'interest balance',
                'total balances' : 'balance',
                'credit limits' : 'limits'
            }
        }
    },
    'ABS' : {
        'location' : {
            'list' : ['new south wales', 'victoria', 'queensland', 'south australia', 'western australia', 
                      'tasmania', 'northern territory', 'australian capital territory'],
            'alias' : {
                'new south wales' : 'NSW',
                'victoria' : 'VIC', 
                'queensland' : 'QLD',
                'south australia' : 'SA',
                'western australia' : 'WA',
                'tasmania' : 'TAS', 
                'northern territory' : 'NT', 
                'australian capital territory' : 'ACT',
                'all' : 'AUS' # default
            }
        },
        'lender_type' : {
            'list' : ['major banks', 'other adis', 'non-adis'],
        },
        'buyer_type' : {
            'list' : ['owner occupier', 'investor', 'non-resident']
        },
        'buyer_subtype' : {
            'list' : ['first home buyer']
        },
        'measure_type' : {
            'list' : ['total housing excluding refinancing', 'internal refinancing', 'external refinancing',
                     'construction of dwellings', 'purchase of newly erected dwellings',
                     'purchase of existing dwellings', 'purchase of residential land',
                     'alterations, additions and repairs', 'total housing including refinancing'],
            'alias' : {
                'total housing excluding refinancing' : 'total_exc_refi', 
                'internal refinancing' : 'int_refi', 
                'external refinancing' : 'ext_refi',
                'construction of dwellings' : 'construction', 
                'purchase of newly erected dwellings' : 'purchase_new',
                'purchase of existing dwellings' : 'purchase_existing', 
                'purchase of residential land' : 'purchase_land',
                'alterations, additions and repairs' : 'renovations', 
                'total housing including refinancing' : 'total'
            }
        },
        'measure_subtype' : {
            'list' : ['value', 'number']
        }
    }
}

In [26]:
# table_structure_dict: contains the variable structure for each table

table_structure_dict = {}

for source in file_dict:
    table_structure_dict[source] = {}

    for key in file_dict[source]:   
        
        table_structure_dict[source][key] = {}

        lookup = file_dict[source][key]['Lookup']
        
        file_ref = file_dict[source][key]['Metadata']['dl_name']

        for seriesloop in lookup['Series ID'].tolist():

            table_structure_dict[source][key][seriesloop] = {}
            
            table_structure_dict[source][key][seriesloop]['file_ref'] = file_ref

            desclist = lookup.loc[lookup['Series ID'] == seriesloop, 'Description'].values[0].lower()

            for var in variable_dict[source]:
                
                searchlist = variable_dict[source][var]['list']

                if match(searchlist, desclist) == []:
                    if var == 'measure_type':
                        output = 'total housing excluding refinancing'
                    else:
                        output = 'all'
                else: 
                    output = match(searchlist, desclist)[0]
                
                if 'alias' in variable_dict[source][var].keys():
                    table_structure_dict[source][key][seriesloop][var] = variable_dict[source][var]['alias'][output]
                else:
                    table_structure_dict[source][key][seriesloop][var] = output

RBA
Credit Card Data - Australia
    Series ID                                   Description       Unit
1    CCCCSNAP            Number of accounts: personal cards       '000
13  CCCCSTTNP  Total number of transactions: personal cards       '000
14  CCCCSTTVP   Total value of transactions: personal cards  $ million
16  CCCCSBAIP    Balances accruing interest: personal cards  $ million
17   CCCCSBTP                Total balances: personal cards  $ million
18   CCCCSCLP                 Credit limits: personal cards  $ million
c01-2-hist
CCCCSNAP
number of accounts: personal cards
['number of accounts', 'total number of transactions', 'total value of transactions', 'balances accruing interest', 'total balances', 'credit limits']
CCCCSTTNP
total number of transactions: personal cards
['number of accounts', 'total number of transactions', 'total value of transactions', 'balances accruing interest', 'total balances', 'credit limits']
CCCCSTTVP
total value of transactions: personal cards
['nu

['new south wales', 'victoria', 'queensland', 'south australia', 'western australia', 'tasmania', 'northern territory', 'australian capital territory']
['major banks', 'other adis', 'non-adis']
['owner occupier', 'investor', 'non-resident']
['first home buyer']
['total housing excluding refinancing', 'internal refinancing', 'external refinancing', 'construction of dwellings', 'purchase of newly erected dwellings', 'purchase of existing dwellings', 'purchase of residential land', 'alterations, additions and repairs', 'total housing including refinancing']
['value', 'number']
A108298709X
households ;  housing finance ;  owner occupier ;  internal refinancing ;  south australia ;  new loan commitments ;  value ;
['new south wales', 'victoria', 'queensland', 'south australia', 'western australia', 'tasmania', 'northern territory', 'australian capital territory']
['major banks', 'other adis', 'non-adis']
['owner occupier', 'investor', 'non-resident']
['first home buyer']
['total housing exc

['first home buyer']
['total housing excluding refinancing', 'internal refinancing', 'external refinancing', 'construction of dwellings', 'purchase of newly erected dwellings', 'purchase of existing dwellings', 'purchase of residential land', 'alterations, additions and repairs', 'total housing including refinancing']
['value', 'number']
A108272004T
households ;  housing finance ;  investor ;  construction of dwellings ;  queensland ;  new loan commitments ;  value ;
['new south wales', 'victoria', 'queensland', 'south australia', 'western australia', 'tasmania', 'northern territory', 'australian capital territory']
['major banks', 'other adis', 'non-adis']
['owner occupier', 'investor', 'non-resident']
['first home buyer']
['total housing excluding refinancing', 'internal refinancing', 'external refinancing', 'construction of dwellings', 'purchase of newly erected dwellings', 'purchase of existing dwellings', 'purchase of residential land', 'alterations, additions and repairs', 'total

households ;  housing finance ;  first home buyers ;  total australia ;  new loan commitments ;  value ;
['new south wales', 'victoria', 'queensland', 'south australia', 'western australia', 'tasmania', 'northern territory', 'australian capital territory']
['major banks', 'other adis', 'non-adis']
['owner occupier', 'investor', 'non-resident']
['first home buyer']
['total housing excluding refinancing', 'internal refinancing', 'external refinancing', 'construction of dwellings', 'purchase of newly erected dwellings', 'purchase of existing dwellings', 'purchase of residential land', 'alterations, additions and repairs', 'total housing including refinancing']
['value', 'number']
A108295202J
households ;  housing finance ;  first home buyers ;  new south wales ;  new loan commitments ;  value ;
['new south wales', 'victoria', 'queensland', 'south australia', 'western australia', 'tasmania', 'northern territory', 'australian capital territory']
['major banks', 'other adis', 'non-adis']
['o

In [41]:
#############################################################

# Section: Aggregate imported tables to final state

#############################################################

# Final_table_dict: contains the final aggregated dataframes that are used for visualisation/ analysis
final_table_dict = {}

for source in table_structure_dict:
    j = 0
    
    final_table_dict[source] = {}
    
    # ABS data is combined into a master table with a single measure column
    if source == 'ABS':  
        
        # Initalise empty list to store a dataframe per file
        df_master_list = [None] * len(table_structure_dict[source])

        for key in table_structure_dict[source]:

            df = file_dict[source][key]['Import_Data']
            df = df[df['Date'] >= '2019-06-01']
            df = df.melt(id_vars=["Date"])
            
            # Initalise empty list to store a dataframe per series
            df_list = [None] * len(table_structure_dict[source][key]) 
            
            i = 0

            for series in table_structure_dict[source][key]:
                
                df_loop = df.loc[df['variable']==series,]
                #df_loop['filename'] = final_table_dict[source][key]['Metadata']['dl_name']

                for n in table_structure_dict[source][key][series]:
                    df_loop.loc[:, n] = table_structure_dict[source][key][series][n]

                    if n == 'variable':
                        df_loop.loc[:, table_structure_dict[source][key][series][n]] = df_loop['value']
                    else:  
                        df_loop.loc[:, n] = table_structure_dict[source][key][series][n]

                    df_loop = df_loop.rename(columns={'All':'total housing excluding refinancing'})
                
                df_list[i] = df_loop

                i = i + 1

            df_master_list[j] = pd.concat(df_list)

            j = j + 1

        final_table_dict[source]['Final_Data'] = pd.concat(df_master_list)
                                              
    # Data in RBA CC data has multiple columns
    elif source == 'RBA':     
        
        for key in table_structure_dict[source]:
            df = file_dict[source][key]['Import_Data']
            
            if key == 'Credit Card Data - Australia':
                source_fmt = 'RBA-Credit'
            elif key == 'Debit Card Data - Australia': 
                source_fmt = 'RBA-Debit'
            
            for series in table_structure_dict[source][key]:
                
                df = df.rename({series:table_structure_dict[source][key][series]['measure_type']}, axis=1)

            final_table_dict[source_fmt] = {}                     
            final_table_dict[source_fmt]['Final_Data'] = df 

In [43]:
# Save the final imported tables for use
pickle_save('imported_data_final',final_table_dict)

In [44]:
write_path = './Output/ABS_Lending_Indicators.csv'

# New code: Export the final table as a CSV
output_df = final_table_dict['ABS']['Final_Data']
# A number of the Series are present in multiple files so must be deduped
output_df = output_df.drop_duplicates(subset=['Date', 'variable', 'value', 'location', 'lender_type', 'buyer_type' \
                                               ,'buyer_subtype','measure_type','measure_subtype'])

# Pivot table to separate # and $
output_df = output_df.pivot_table(index=['Date', 'location', 'lender_type', 'buyer_type','buyer_subtype' \
                                         ,'measure_type'],
                                  columns='measure_subtype',
                                 values='value', aggfunc='sum').reset_index()
# Aggregate new table
output_df = output_df.groupby(['Date', 'location', 'lender_type', 'buyer_type','buyer_subtype' \
                            ,'measure_type']).agg({'number':sum, 'value':sum}).reset_index()

output_df.to_csv(write_path, index=False)