In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import re

### Income

In [2]:
df = pd.read_csv("../data/raw/datasource-AU_Govt_ABS-UoM_AURIN_DB_3_abs_data_by_region_income_asgs_sa2_2011_2019.csv")
SA2 = pd.read_csv("../data/curated/postcode_to_sa2.csv")

In [3]:
df = df[['FID', 'sa2_maincode_2016', 'geometry', 'sa2_name_2016', 'yr','estm_prsnl_incme_yr_end_30_jne_tl_erns_excl_gvrnmt_pns_alwncs_n']]
df = df.rename({'estm_prsnl_incme_yr_end_30_jne_tl_erns_excl_gvrnmt_pns_alwncs_n': 'income'}, axis=1) 
df['income'].fillna(0, inplace=True)

In [4]:
df_avg = df\
        .groupby('sa2_maincode_2016') \
        .agg(
            {
                'income':'mean',
            }
        ) 
df_avg.head()

Unnamed: 0_level_0,income
sa2_maincode_2016,Unnamed: 1_level_1
101021007,1235.142857
101021008,2797.142857
101021009,3592.571429
101021010,1747.285714
101021011,5841.714286


In [5]:
year = list(set(df.yr))
year.sort()

In [6]:
year

[2011, 2014, 2015, 2016, 2017, 2018, 2019]

In [7]:
sa2_code = list(set(SA2.SA2_MAINCODE_2016))

In [8]:
income_rate = {}
for code in sa2_code:
    df_temp = df[df['sa2_maincode_2016'] == code]
    num = 0
    rate = 0
    year = list(set(df_temp.yr))
    year.sort()
    
    if(len(year) <= 1):
        next
        
    for i in range(len(year)-1):
        curr_ = df_temp.loc[df_temp['yr'] == year[i],'income'].values[0]
        next_ = df_temp.loc[df_temp['yr'] == year[i+1],'income'].values[0]
        if(curr_ != 0 and next_ != 0):
            num += 1
            rate += (next_ - curr_)/curr_
            avg = rate/num
    income_rate[code] = round(avg,5)

In [9]:
df_income = pd.DataFrame(income_rate.items(), columns=['SA2_2016', 'income_rate'])

In [10]:
df_income

Unnamed: 0,SA2_2016,income_rate
0,209041433.0,0.07911
1,209041436.0,0.02738
2,206041117.0,0.08111
3,206041118.0,0.14695
4,206041119.0,0.02111
...,...,...
325,208021426.0,0.02328
326,202031032.0,0.03507
327,202031033.0,-0.00394
328,213051361.0,0.01505


### Number of Bedrooms

2021

In [11]:
df = pd.read_csv("../data/raw/2021_GCP_SA2_for_VIC_short-header/2021 Census GCP Statistical Area 2 for VIC/2021Census_G41_VIC_SA2.csv")

In [12]:
df = df[['SA2_CODE_2021', 'Total_NofB_0_i_b', 'Total_NofB_1', 'Total_NofB_2', 'Total_NofB_3',
        'Total_NofB_4', 'Total_NofB_5','Total_NofB_6_or_m']]

In [13]:
df

Unnamed: 0,SA2_CODE_2021,Total_NofB_0_i_b,Total_NofB_1,Total_NofB_2,Total_NofB_3,Total_NofB_4,Total_NofB_5,Total_NofB_6_or_m
0,201011001,3,73,349,1809,3134,302,32
1,201011002,6,218,1070,2351,1117,203,20
2,201011005,5,36,186,1074,1020,157,29
3,201011006,0,44,200,1645,1781,121,19
4,201011007,3,14,59,539,597,140,26
...,...,...,...,...,...,...,...,...
519,217041478,10,72,497,1784,1125,172,45
520,217041479,13,158,1539,4108,2384,352,51
521,217041480,16,273,1265,2459,1064,202,38
522,297979799,0,0,0,0,0,0,0


In [14]:
beds_2021 = {}
for code in df.SA2_CODE_2021:
    df_temp = df[df['SA2_CODE_2021'] == code]
    
    Nob = 0 # number of beds
    Nop = 0 # number of property
    
    for i in range(len(df.columns)-1):
        curr_ = df_temp.iloc[:,i+1].values[0]
        Nop += curr_
        Nob += curr_ * i
        
    avg_beds = Nob/Nop
    beds_2021[code] = round(avg_beds,5)  

  avg_beds = Nob/Nop


In [15]:
df_bed_2021 = pd.DataFrame(beds_2021.items(), columns=['SA2_2021', 'avg_beds_2021'])
df_bed_2021.to_csv('../data/curated/df_bed_2021.csv',index=False)

2016

In [16]:
df = pd.read_csv("../data/raw/2016_GCP_SA2_for_VIC_short-header/2016 Census GCP Statistical Area 2 for VIC/2016Census_G38_VIC_SA2.csv")

In [17]:
df

Unnamed: 0,SA2_MAINCODE_2016,Separate_house_NofB_0_i_b,Separate_house_NofB_1,Separate_house_NofB_2,Separate_house_NofB_3,Separate_house_NofB_4,Separate_house_NofB_5,Separate_house_NofB_6_or_m,Separate_house_NofB_NS,Separate_house_Total,...,Dweling_structur_NS_Total,Total_NofB_0_i_b,Total_NofB_1,Total_NofB_2,Total_NofB_3,Total_NofB_4,Total_NofB_5,Total_NofB_6_or_m,Total_NofB_NS,Total_Total
0,201011001,0,10,111,1291,1778,191,26,48,3456,...,23,4,67,289,1411,1843,191,26,53,3886
1,201011002,3,53,634,1806,861,148,17,62,3587,...,28,13,183,1066,2155,948,170,25,95,4654
2,201011003,3,82,1242,3996,1900,276,52,159,7707,...,54,6,290,1795,4384,1974,276,54,210,8992
3,201011004,13,68,1076,4327,1220,147,28,187,7067,...,45,14,445,2242,5204,1477,159,36,281,9868
4,201011005,3,17,161,974,861,124,24,36,2198,...,7,3,31,185,1008,866,124,24,40,2281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,217041478,4,45,433,1620,910,145,39,77,3266,...,26,4,77,493,1657,922,148,39,90,3420
460,217041479,11,36,733,3656,1882,268,51,135,6768,...,26,12,123,1409,3812,1896,269,48,212,7786
461,217041480,6,30,435,2065,847,157,40,84,3655,...,39,19,262,1146,2317,881,156,43,143,4963
462,297979799,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
df = df[['SA2_MAINCODE_2016', 'Total_NofB_0_i_b', 'Total_NofB_1', 'Total_NofB_2', 'Total_NofB_3',
        'Total_NofB_4', 'Total_NofB_5','Total_NofB_6_or_m']]

In [19]:
beds_2016 = {}
for code in df.SA2_MAINCODE_2016:
    df_temp = df[df['SA2_MAINCODE_2016'] == code]
    
    Nob = 0 # number of beds
    Nop = 0 # number of property
    
    for i in range(len(df.columns)-1):
        curr_ = df_temp.iloc[:,i+1].values[0]
        Nop += curr_
        Nob += curr_ * i
        
    avg_beds = Nob/Nop
    beds_2016[code] = round(avg_beds,5)  

  avg_beds = Nob/Nop


In [20]:
df_bed_2016 = pd.DataFrame(beds_2016.items(), columns=['SA2_2016', 'avg_beds_2016'])
df_bed_2016.to_csv('../data/curated/df_bed_2016.csv',index=False)

2011

In [21]:
df = pd.read_csv("../data/raw/2011_BCP_SA2_for_VIC_short-header/2011 Census BCP Statistical Areas Level 2 for VIC/VIC/2011Census_B36_VIC_SA2_short.csv")

In [22]:
df

Unnamed: 0,region_id,Separate_house_NofB_0_i_b,Separate_house_NofB_1,Separate_house_NofB_2,Separate_house_NofB_3,Separate_house_NofB_4,Separate_house_NofB_5,Separate_house_NofB_6_or_m,Separate_house_NofB_NS,Separate_house_Total,...,Dweling_structur_NS_Total,Total_NofB_0_i_b,Total_NofB_1,Total_NofB_2,Total_NofB_3,Total_NofB_4,Total_NofB_5,Total_NofB_6_or_m,Total_NofB_NS,Total_Total
0,201011001,0,0,79,1019,1227,126,11,24,2486,...,4,0,62,215,1075,1237,126,11,37,2763
1,201011002,8,43,746,2115,870,141,24,58,4005,...,9,14,176,1167,2263,888,141,24,87,4760
2,201011003,8,82,1112,4095,1741,210,50,111,7409,...,0,15,323,1740,4344,1772,213,58,154,8619
3,201011004,6,60,1014,4773,1231,148,18,116,7366,...,0,16,440,2062,5071,1248,151,24,201,9213
4,201011005,4,14,118,1038,731,106,20,24,2055,...,0,4,32,171,1055,738,106,20,24,2150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430,217021431,3,37,199,575,178,31,14,16,1053,...,0,3,63,253,649,203,35,14,29,1249
431,217021432,7,26,617,3602,1591,213,44,87,6187,...,0,13,137,1336,3760,1599,213,44,131,7233
432,217021433,4,41,499,2209,814,150,24,50,3791,...,0,13,246,1140,2372,840,153,27,105,4896
433,297979799,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
df = df[['region_id', 'Total_NofB_0_i_b', 'Total_NofB_1', 'Total_NofB_2', 'Total_NofB_3',
        'Total_NofB_4', 'Total_NofB_5','Total_NofB_6_or_m']]

In [24]:
beds_2011 = {}
for code in df.region_id:
    df_temp = df[df['region_id'] == code]
    
    Nob = 0 # number of beds
    Nop = 0 # number of property
    
    for i in range(len(df.columns)-1):
        curr_ = df_temp.iloc[:,i+1].values[0]
        Nop += curr_
        Nob += curr_ * i
        
    avg_beds = Nob/Nop
    beds_2011[code] = round(avg_beds,5)  

  avg_beds = Nob/Nop


In [25]:
df_bed_2011 = pd.DataFrame(beds_2011.items(), columns=['SA2_2016', 'avg_beds_2011'])

In [26]:
sa2 = SA2[['postcode','SA2_MAINCODE_2016','SA2_CODE_2021']]
sa2 = sa2.rename({'SA2_MAINCODE_2016': 'SA2_2016', 'SA2_CODE_2021': 'SA2_2021'}, axis=1) 
df_b_final = sa2.merge(df_bed_2021, on = 'SA2_2021')
df_b_final = df_b_final.merge(df_bed_2016, on = 'SA2_2016')
df_b_final = df_b_final.merge(df_bed_2011, on = 'SA2_2016')

In [27]:
df_b_final['bed_rate'] = 0
for postcode in df_b_final.postcode:
    b2011 = df_b_final.loc[df_b_final['postcode'] == postcode, 'avg_beds_2011'].values[0]
    b2016 = df_b_final.loc[df_b_final['postcode'] == postcode, 'avg_beds_2016'].values[0]
    b2021 = df_b_final.loc[df_b_final['postcode'] == postcode, 'avg_beds_2021'].values[0]
    
    bed_rate = ((b2016 - b2011) + (b2021 - b2016))/2
    
    df_b_final.loc[df_b_final['postcode'] == postcode, 'bed_rate'] = bed_rate
    
df_b_final = df_b_final.drop(['avg_beds_2021','avg_beds_2016','avg_beds_2011'],axis=1)

In [28]:
df_b_final

Unnamed: 0,postcode,SA2_2016,SA2_2021,bed_rate
0,3000,206041122.0,206041503,0.055980
1,3001,206041122.0,206041503,0.055980
2,8001,206041122.0,206041503,0.055980
3,8045,206041122.0,206041503,0.055980
4,8051,206041122.0,206041503,0.055980
...,...,...,...,...
655,3984,205031093.0,205031093,0.068495
656,3990,205031093.0,205031093,0.068495
657,3991,205031093.0,205031093,0.068495
658,3992,205031093.0,205031093,0.068495


### Age and Population

Make a dic to store SA2 that have multiple postcode

In [29]:
domain = pd.read_csv("../data/curated/domain_final.csv")

In [30]:
domain_sa2 = domain[['postcode','SA2_CODE21']]
domain_sa2 = domain_sa2.drop_duplicates()
domain_sa2 = domain_sa2.groupby('SA2_CODE21').count()
mul_sa2 = list(domain_sa2[domain_sa2['postcode'] > 1].index)

In [31]:
mul_l = {}
for sa2 in mul_sa2:
    df_temp = domain[domain['SA2_CODE21'] == sa2].groupby('postcode').count()
    mul_l[sa2] = {}
    for postcode in list(df_temp.index):
        mul_l[sa2][postcode] = df_temp.loc[[postcode]]['name'].values[0]
    

Deal with all age and population data

In [32]:
pp = pd.read_csv("../data/curated/df_p.csv")

In [33]:
pp_2022 = pp[pp['YEAR'] == 2022]
pp_2025 = pp[pp['YEAR'] == 2025]
pp_2022 = pp_2022.rename({'SA2_CODE': 'SA2_2016'}, axis=1) 
pp_2025 = pp_2025.rename({'SA2_CODE': 'SA2_2016'}, axis=1) 

In [34]:
df_pp_2022 = df_b_final.merge(pp_2022, on = 'SA2_2016')
df_pp_2022 = df_pp_2022.drop(['SA2_2016','Unnamed: 0','YEAR','SA2_NAME','SEX','rate','age_max','bed_rate'],axis=1)
df_pp_2025 = df_b_final.merge(pp_2025, on = 'SA2_2016')
df_pp_2025 = df_pp_2025.drop(['SA2_2016','Unnamed: 0','YEAR','SA2_NAME','SEX','rate','age_max','bed_rate'],axis=1)

In [35]:
l_change = list(df_pp_2022.columns)[2:]

In [36]:
for i in range(len(df_pp_2022)):
    sa2 = df_pp_2022.iloc[i,:].SA2_2021
    if sa2 in mul_l.keys():
        postcode = df_pp_2022.iloc[i,:].postcode
        if postcode in mul_l[sa2].keys():
            for index in l_change:
                origin = df_pp_2022.loc[(df_pp_2022['postcode'] == postcode)&(df_pp_2022['SA2_2021'] == sa2), index].values[0]
                rate = mul_l[sa2][postcode] /  sum(list(mul_l[sa2].values()))
                new = origin * rate
                df_pp_2022.loc[(df_pp_2022['postcode'] == postcode)&(df_pp_2022['SA2_2021'] == sa2), index] = round(new,5)

In [37]:
df_pp_2022['total_age'] = 0
df_age_2022 = df_pp_2022.iloc[:,2:20]
for i in range(len(df_age_2022.columns)):
    low = int(re.findall(r'[0-9]+', df_age_2022.columns[i])[0])
    avg = (low + 2) 
    df_pp_2022['total_age'] = df_pp_2022['total_age'] + avg * df_age_2022.iloc[:,i]
df_pp_2022['avg_age'] = df_pp_2022['total_age']/df_pp_2022['Total']

In [38]:
df_pp_2022

Unnamed: 0,postcode,SA2_2021,Age0-4,Age5-9,Age10-14,Age15-19,Age20-24,Age25-29,Age30-34,Age35-39,...,Age55-59,Age60-64,Age65-69,Age70-74,Age75-79,Age80-84,Age85+,Total,total_age,avg_age
0,3000,206041503,1246.00000,535.00000,330.00000,3323.00000,16005.00000,15014.00000,7925.00000,4323.00000,...,1021.00000,1013.00000,819.00000,664.00000,456.00000,190.00000,127.00000,57849.0000,1.745888e+06,30.180090
1,3001,206041503,1246.00000,535.00000,330.00000,3323.00000,16005.00000,15014.00000,7925.00000,4323.00000,...,1021.00000,1013.00000,819.00000,664.00000,456.00000,190.00000,127.00000,57849.0000,1.745888e+06,30.180090
2,8001,206041503,1246.00000,535.00000,330.00000,3323.00000,16005.00000,15014.00000,7925.00000,4323.00000,...,1021.00000,1013.00000,819.00000,664.00000,456.00000,190.00000,127.00000,57849.0000,1.745888e+06,30.180090
3,8045,206041503,1246.00000,535.00000,330.00000,3323.00000,16005.00000,15014.00000,7925.00000,4323.00000,...,1021.00000,1013.00000,819.00000,664.00000,456.00000,190.00000,127.00000,57849.0000,1.745888e+06,30.180090
4,8051,206041503,1246.00000,535.00000,330.00000,3323.00000,16005.00000,15014.00000,7925.00000,4323.00000,...,1021.00000,1013.00000,819.00000,664.00000,456.00000,190.00000,127.00000,57849.0000,1.745888e+06,30.180090
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,3984,205031093,358.03704,368.40741,384.22222,330.81481,222.18519,245.77778,312.14815,339.11111,...,460.96296,554.81481,564.66667,519.03704,404.96296,242.66667,218.55556,6613.7037,3.050444e+05,46.123089
656,3990,205031093,1381.00000,1421.00000,1482.00000,1276.00000,857.00000,948.00000,1204.00000,1308.00000,...,1778.00000,2140.00000,2178.00000,2002.00000,1562.00000,936.00000,843.00000,25510.0000,1.176600e+06,46.123089
657,3991,205031093,1381.00000,1421.00000,1482.00000,1276.00000,857.00000,948.00000,1204.00000,1308.00000,...,1778.00000,2140.00000,2178.00000,2002.00000,1562.00000,936.00000,843.00000,25510.0000,1.176600e+06,46.123089
658,3992,205031093,1381.00000,1421.00000,1482.00000,1276.00000,857.00000,948.00000,1204.00000,1308.00000,...,1778.00000,2140.00000,2178.00000,2002.00000,1562.00000,936.00000,843.00000,25510.0000,1.176600e+06,46.123089


In [39]:
l_change = list(df_pp_2025.columns)[2:]

In [40]:
for i in range(len(df_pp_2025)):
    sa2 = df_pp_2025.iloc[i,:].SA2_2021
    if sa2 in mul_l.keys():
        postcode = df_pp_2025.iloc[i,:].postcode
        if postcode in mul_l[sa2].keys():
            for index in l_change:
                origin = df_pp_2025.loc[(df_pp_2025['postcode'] == postcode)&(df_pp_2025['SA2_2021'] == sa2), index].values[0]
                rate = mul_l[sa2][postcode] / sum(list(mul_l[sa2].values()))
                new = origin * rate
                df_pp_2025.loc[(df_pp_2025['postcode'] == postcode)&(df_pp_2025['SA2_2021'] == sa2), index] = round(new,5)

In [41]:
df_pp_2025['total_age'] = 0
df_age_2025 = df_pp_2025.iloc[:,2:20]
for i in range(len(df_age_2025.columns)):
    low = int(re.findall(r'[0-9]+', df_age_2025.columns[i])[0])
    avg = (low + 2) 
    df_pp_2025['total_age'] = df_pp_2025['total_age'] + avg * df_age_2025.iloc[:,i]
df_pp_2025['avg_age'] = df_pp_2025['total_age']/df_pp_2025['Total']

In [42]:
df_pp_2025

Unnamed: 0,postcode,SA2_2021,Age0-4,Age5-9,Age10-14,Age15-19,Age20-24,Age25-29,Age30-34,Age35-39,...,Age55-59,Age60-64,Age65-69,Age70-74,Age75-79,Age80-84,Age85+,Total,total_age,avg_age
0,3000,206041503,1413.00000,602.00000,380.0000,3293.00000,15302.00000,15646.00000,9167.00000,5022.00000,...,1129.0000,1133.00000,959.00000,772.00000,595.00000,291.00000,169.00000,61646.00000,1.907902e+06,30.949324
1,3001,206041503,1413.00000,602.00000,380.0000,3293.00000,15302.00000,15646.00000,9167.00000,5022.00000,...,1129.0000,1133.00000,959.00000,772.00000,595.00000,291.00000,169.00000,61646.00000,1.907902e+06,30.949324
2,8001,206041503,1413.00000,602.00000,380.0000,3293.00000,15302.00000,15646.00000,9167.00000,5022.00000,...,1129.0000,1133.00000,959.00000,772.00000,595.00000,291.00000,169.00000,61646.00000,1.907902e+06,30.949324
3,8045,206041503,1413.00000,602.00000,380.0000,3293.00000,15302.00000,15646.00000,9167.00000,5022.00000,...,1129.0000,1133.00000,959.00000,772.00000,595.00000,291.00000,169.00000,61646.00000,1.907902e+06,30.949324
4,8051,206041503,1413.00000,602.00000,380.0000,3293.00000,15302.00000,15646.00000,9167.00000,5022.00000,...,1129.0000,1133.00000,959.00000,772.00000,595.00000,291.00000,169.00000,61646.00000,1.907902e+06,30.949324
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,3984,205031093,362.44444,386.81481,397.7037,361.92593,218.55556,231.77778,310.07407,356.74074,...,467.7037,577.37037,604.85185,547.81481,468.48148,284.66667,242.14815,6933.11111,3.239701e+05,46.727956
656,3990,205031093,1398.00000,1492.00000,1534.0000,1396.00000,843.00000,894.00000,1196.00000,1376.00000,...,1804.0000,2227.00000,2333.00000,2113.00000,1807.00000,1098.00000,934.00000,26742.00000,1.249599e+06,46.727956
657,3991,205031093,1398.00000,1492.00000,1534.0000,1396.00000,843.00000,894.00000,1196.00000,1376.00000,...,1804.0000,2227.00000,2333.00000,2113.00000,1807.00000,1098.00000,934.00000,26742.00000,1.249599e+06,46.727956
658,3992,205031093,1398.00000,1492.00000,1534.0000,1396.00000,843.00000,894.00000,1196.00000,1376.00000,...,1804.0000,2227.00000,2333.00000,2113.00000,1807.00000,1098.00000,934.00000,26742.00000,1.249599e+06,46.727956


In [43]:
for i in range(len(df_pp_2025)):
    postcode = df_pp_2025.iloc[i,:].postcode
    p2025 = df_pp_2025.loc[df_pp_2025['postcode'] == postcode, 'Total'].values[0]
    p2022 = df_pp_2022.loc[df_pp_2022['postcode'] == postcode, 'Total'].values[0]
    a2025 = df_pp_2025.loc[df_pp_2025['postcode'] == postcode, 'avg_age'].values[0]
    a2022 = df_pp_2022.loc[df_pp_2022['postcode'] == postcode, 'avg_age'].values[0]
    df_pp_2025.loc[df_pp_2025['postcode'] == postcode, 'p_rate'] = round((p2025-p2022)/p2022,5)
    df_pp_2025.loc[df_pp_2025['postcode'] == postcode, 'a_rate'] = round((a2025-a2022)/a2022,5)


  df_pp_2025.loc[df_pp_2025['postcode'] == postcode, 'p_rate'] = round((p2025-p2022)/p2022,5)


In [44]:
df_pp_final = df_pp_2025[['postcode','SA2_2021','p_rate','a_rate']]

In [45]:
df_pp_final 

Unnamed: 0,postcode,SA2_2021,p_rate,a_rate
0,3000,206041503,0.06564,0.02549
1,3001,206041503,0.06564,0.02549
2,8001,206041503,0.06564,0.02549
3,8045,206041503,0.06564,0.02549
4,8051,206041503,0.06564,0.02549
...,...,...,...,...
655,3984,205031093,0.04829,0.01311
656,3990,205031093,0.04829,0.01311
657,3991,205031093,0.04829,0.01311
658,3992,205031093,0.04829,0.01311


<h1>Final integration</h1>

In [65]:
df_final = df_b_final.merge(df_income, on = 'SA2_2016')
df_final = df_final.merge(df_pp_final, on = 'postcode')
df_final = df_final.drop('SA2_2021_y',axis=1)
df_final = df_final.rename({'SA2_2021_x':'SA2_2021'}, axis=1)

In [67]:
df_final

Unnamed: 0,postcode,SA2_2016,SA2_2021,bed_rate,income_rate,p_rate,a_rate
0,3000,206041122.0,206041503,0.055980,0.14455,0.06564,0.02549
1,3001,206041122.0,206041503,0.055980,0.14455,0.06564,0.02549
2,8001,206041122.0,206041503,0.055980,0.14455,0.06564,0.02549
3,8045,206041122.0,206041503,0.055980,0.14455,0.06564,0.02549
4,8051,206041122.0,206041503,0.055980,0.14455,0.06564,0.02549
...,...,...,...,...,...,...,...
655,3984,205031093.0,205031093,0.068495,0.03666,0.04829,0.01311
656,3990,205031093.0,205031093,0.068495,0.03666,0.04829,0.01311
657,3991,205031093.0,205031093,0.068495,0.03666,0.04829,0.01311
658,3992,205031093.0,205031093,0.068495,0.03666,0.04829,0.01311


<h1>Ranking</h1>

In [68]:
df_final.to_csv('../data/curated/growth_rate_final.csv',index=False)