# Add SEIFA advantage/disadvantage deciles for post codes to the school attendance data
* Reads the cleaned attendance data ```../data/cleaned_data_schoolattendancerate.csv```
* Reads the cleaned SEIFA data ```../data/SEIFA-2016-by-post-code-cleaned.csv```
* Selects the State percentile data from the SEIFA for each school postcode
* Adds the State Percentile SEIFA data to the attendance data 
* Writes out the file as ```../data/cleaned_data_schoolattendancerate_seifa.csv```
-------

In [1]:
import pandas as pd
import numpy as np
import math 

In [2]:
attendance_file_name = '../data/cleaned_data_schoolattendancerate.csv'
aus_seifa_file_name = '../data/SEIFA-2016-by-post-code-cleaned.csv'
attendance_seifa_file_name = '../data/cleaned_data_schoolattendancerate_seifa.csv'

In [3]:
attendance_df = pd.read_csv(attendance_file_name)
attendance_df.head()

Unnamed: 0,School Code,School,Attendance Year,School Type,School Category,Remoteness Area,Region,Local Government Area,State Electorate,FTE Days Attended,Possible Attendance Days,Attendance Rate,Official Low Year Level,Official High Year Level,Suburb,Post Code,All Student Count,Longitude,Latitude
0,591,Abercorn State School,2015,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1987.5,2103.0,94.5,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
1,591,Abercorn State School,2016,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1632.5,1705.0,95.7,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
2,591,Abercorn State School,2017,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1405.0,1440.0,97.6,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
3,591,Abercorn State School,2018,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1640.0,1700.0,96.5,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
4,591,Abercorn State School,2019,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1657.5,1728.0,95.9,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955


In [4]:
aus_seifa_df = pd.read_csv(aus_seifa_file_name) 
aus_seifa_df.head()

Unnamed: 0,Postal Area Code,Score,Rank within Australia,Rank within Australia - Decile,Rank within Australia - Percentile,Rank within State or Territory,Rank within State or Territory - Decile,Rank within State or Territory - Percentile,Minimum score for SA1s in area,Maximum score for SA1s in area,Usual resident population
0,800,1096,2398,10,92,33,10,92,914,1167,6464
1,810,1052,2121,9,81,26,8,73,868,1168,33302
2,812,1020,1800,7,69,22,7,62,782,1130,18873
3,815,1096,2406,10,92,34,10,95,1096,1096,292
4,820,1094,2389,10,91,32,9,89,523,1211,19447


In [5]:
# get the list of post codes from the school data and make a new column for the SEIFA data 
post_codes = attendance_df['Post Code'].unique()
qld_seifa_df = aus_seifa_df[aus_seifa_df['Postal Area Code'].isin(post_codes)]
attendance_df.insert(12,'SEIFA Territory Percentile',math.nan,allow_duplicates=True)
attendance_df

Unnamed: 0,School Code,School,Attendance Year,School Type,School Category,Remoteness Area,Region,Local Government Area,State Electorate,FTE Days Attended,Possible Attendance Days,Attendance Rate,SEIFA Territory Percentile,Official Low Year Level,Official High Year Level,Suburb,Post Code,All Student Count,Longitude,Latitude
0,591,Abercorn State School,2015,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1987.5,2103.0,94.5,,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
1,591,Abercorn State School,2016,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1632.5,1705.0,95.7,,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
2,591,Abercorn State School,2017,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1405.0,1440.0,97.6,,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
3,591,Abercorn State School,2018,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1640.0,1700.0,96.5,,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
4,591,Abercorn State School,2019,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1657.5,1728.0,95.9,,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6104,275,Zillmere State School,2015,State School,Primary,Major Cities of Australia,Metropolitan,Brisbane (C),Aspley,11786.0,12919.0,91.2,,Prep Year,Year 6,Zillmere,4034,130.0,153.037355,-27.364673
6105,275,Zillmere State School,2016,State School,Primary,Major Cities of Australia,Metropolitan,Brisbane (C),Aspley,12816.5,13956.0,91.8,,Prep Year,Year 6,Zillmere,4034,130.0,153.037355,-27.364673
6106,275,Zillmere State School,2017,State School,Primary,Major Cities of Australia,Metropolitan,Brisbane (C),Aspley,11195.0,12314.0,90.9,,Prep Year,Year 6,Zillmere,4034,130.0,153.037355,-27.364673
6107,275,Zillmere State School,2018,State School,Primary,Major Cities of Australia,Metropolitan,Brisbane (C),Aspley,12004.0,13503.0,88.9,,Prep Year,Year 6,Zillmere,4034,130.0,153.037355,-27.364673


In [6]:
# loop over the attendance data and add the seifa data for each post code 
percentile = qld_seifa_df.columns[7]
for irow in range (len(attendance_df)):
    pcode = attendance_df.iloc[irow,16]
    value = qld_seifa_df.loc[qld_seifa_df['Postal Area Code'] == pcode,[percentile]]
    
    #there are a couple of post codes in the attendance data that are missing from the SEIFA data 
    if (len(value) == 1):
        attendance_df.iloc[irow,12] = value.iloc[0,0]
attendance_df.head(10)

Unnamed: 0,School Code,School,Attendance Year,School Type,School Category,Remoteness Area,Region,Local Government Area,State Electorate,FTE Days Attended,Possible Attendance Days,Attendance Rate,SEIFA Territory Percentile,Official Low Year Level,Official High Year Level,Suburb,Post Code,All Student Count,Longitude,Latitude
0,591,Abercorn State School,2015,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1987.5,2103.0,94.5,11.0,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
1,591,Abercorn State School,2016,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1632.5,1705.0,95.7,11.0,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
2,591,Abercorn State School,2017,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1405.0,1440.0,97.6,11.0,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
3,591,Abercorn State School,2018,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1640.0,1700.0,96.5,11.0,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
4,591,Abercorn State School,2019,State School,Primary,Outer Regional Australia,Central Queensland,North Burnett (R),Callide,1657.5,1728.0,95.9,11.0,Prep Year,Year 6,Abercorn,4627,18.0,151.127031,-25.135955
5,1275,Abergowrie State School,2015,State School,Primary,Remote Australia,North Queensland,Hinchinbrook (S),Hinchinbrook,1007.0,1067.0,94.4,27.0,Prep Year,Year 6,Abergowrie,4850,4.0,145.88351,-18.474697
6,1275,Abergowrie State School,2016,State School,Primary,Remote Australia,North Queensland,Hinchinbrook (S),Hinchinbrook,420.5,484.0,86.9,27.0,Prep Year,Year 6,Abergowrie,4850,4.0,145.88351,-18.474697
7,1275,Abergowrie State School,2017,State School,Primary,Remote Australia,North Queensland,Hinchinbrook (S),Hinchinbrook,371.5,384.0,96.7,27.0,Prep Year,Year 6,Abergowrie,4850,4.0,145.88351,-18.474697
8,1275,Abergowrie State School,2018,State School,Primary,Remote Australia,North Queensland,Hinchinbrook (S),Hinchinbrook,614.0,679.0,90.4,27.0,Prep Year,Year 6,Abergowrie,4850,4.0,145.88351,-18.474697
9,1275,Abergowrie State School,2019,State School,Primary,Remote Australia,North Queensland,Hinchinbrook (S),Hinchinbrook,596.0,652.0,91.4,27.0,Prep Year,Year 6,Abergowrie,4850,4.0,145.88351,-18.474697


In [8]:
# write out the expanded attendance dataframe 
attendance_df.to_csv(attendance_seifa_file_name,index=False,header=True)