In [1]:
import pandas as pd
import geopandas as gpd

regions_df = pd.read_csv('../../data/raw/location/sa2_to_rental_suburb_groups.csv')
regions_df['geometry'] = gpd.GeoSeries.from_wkt(regions_df['geometry'])

regions_df = gpd.GeoDataFrame(
    regions_df,
    geometry='geometry'
)

def fix_col_names(df):
    df.columns = df.columns.str.lower().str.replace(' ', '_')

In [9]:
crime_df = pd.read_csv('../../data/raw/crime/crime.csv')

def generate_quarters(year, quarter):
    result = []
    # Loop back to cover the last 4 quarters
    for i in range(3, -1, -1):  # 4 quarters back (3 before and the current quarter)
        new_quarter = (quarter - i - 1) % 4 + 1
        new_year = year if new_quarter <= quarter else year - 1
        result.append((new_year, new_quarter))
    return result

# Process each row in the dataframe and create new rows for each quarter
new_rows = []
for _, row in crime_df.iterrows():
    end_year = row['year']
    end_quarter = 2
    
    # Generate quarters for this row
    quarters = generate_quarters(end_year, end_quarter)
    
    # For each quarter, create a new row
    for year, quarter in quarters:
        new_row = row.copy()
        new_row['year'] = year
        new_row['quarter'] = quarter
        new_rows.append(new_row)

# Create a new dataframe from the expanded rows
expanded_crime_df = pd.DataFrame(new_rows).drop(columns=['Unnamed: 0', 'year_ending'])

expanded_crime_df

Unnamed: 0,suburbs,year,a20_assault_and_related_offences,a50_robbery,"a70_stalking,_harassment_and_threatening_behaviour",a80_dangerous_and_negligent_acts_endangering_people,other_crimes_against_the_person,b10_arson,b20_property_damage,b30_burglary/break_and_enter,...,e20_breaches_of_orders,f20_transport_regulation_offences,f90_miscellaneous_offences,c90_other_drug_offences,f30_other_government_regulatory_offences,d40_public_security_offences,f10_regulatory_driving_offences,b60_bribery,total_crimes,quarter
0,Albert Park-Middle Park-West St Kilda,2014,178.045119,15.312481,89.447606,14.557886,71.608504,5.309056,161.823843,165.256518,...,210.483706,1.109359,0.321457,9.142897e-01,10.106054,3.366460e+00,1.075971,0.323363,2586.271942,3
0,Albert Park-Middle Park-West St Kilda,2014,178.045119,15.312481,89.447606,14.557886,71.608504,5.309056,161.823843,165.256518,...,210.483706,1.109359,0.321457,9.142897e-01,10.106054,3.366460e+00,1.075971,0.323363,2586.271942,4
0,Albert Park-Middle Park-West St Kilda,2015,178.045119,15.312481,89.447606,14.557886,71.608504,5.309056,161.823843,165.256518,...,210.483706,1.109359,0.321457,9.142897e-01,10.106054,3.366460e+00,1.075971,0.323363,2586.271942,1
0,Albert Park-Middle Park-West St Kilda,2015,178.045119,15.312481,89.447606,14.557886,71.608504,5.309056,161.823843,165.256518,...,210.483706,1.109359,0.321457,9.142897e-01,10.106054,3.366460e+00,1.075971,0.323363,2586.271942,2
1,Albert Park-Middle Park-West St Kilda,2015,163.245015,18.871981,60.409599,12.829730,75.826735,8.872914,144.314600,174.305985,...,282.063651,5.377950,2.360719,1.157002e-09,5.624394,1.606563e+00,0.466445,0.161682,2712.910871,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1438,Yarraville-Seddon,2023,97.019121,9.352908,21.195015,6.622976,32.999610,4.028832,131.906303,121.507981,...,117.863711,0.627582,0.125516,0.000000e+00,0.502065,1.553342e-09,0.000000,0.000000,1401.943417,2
1439,Yarraville-Seddon,2023,99.848417,12.814894,18.733114,9.803561,27.770208,8.478425,124.552530,134.066846,...,125.408341,1.926957,0.376549,0.000000e+00,0.324076,0.000000e+00,0.000000,0.000000,1565.947609,3
1439,Yarraville-Seddon,2023,99.848417,12.814894,18.733114,9.803561,27.770208,8.478425,124.552530,134.066846,...,125.408341,1.926957,0.376549,0.000000e+00,0.324076,0.000000e+00,0.000000,0.000000,1565.947609,4
1439,Yarraville-Seddon,2024,99.848417,12.814894,18.733114,9.803561,27.770208,8.478425,124.552530,134.066846,...,125.408341,1.926957,0.376549,0.000000e+00,0.324076,0.000000e+00,0.000000,0.000000,1565.947609,1


In [10]:
schools_by_region = pd.read_csv('../../data/raw/schools/schools_by_region.csv')

schools_by_region

Unnamed: 0,best_school_number_vce_subjects,best_school_satisfactory_complete_vce_percent,best_school_median_study_score,best_school_study_score_over_40_percent,best_school_percentage_applying_to_victorian_uni,suburbs,best_school_school_name,avg_school_number_vce_subjects,avg_school_satisfactory_complete_vce_percent,avg_school_median_study_score,avg_school_study_score_over_40_percent,avg_school_percentage_applying_to_victorian_uni,zoned_school_number_vce_subjects,zoned_school_satisfactory_complete_vce_percent,zoned_school_median_study_score,zoned_school_study_score_over_40_percent,zoned_school_percentage_applying_to_victorian_uni
0,57.0,99.0,31.0,6.3,83.0,Albert Park-Middle Park-West St Kilda,albert park college,57.0,99.0,31.00,6.300,83.0,50.954001,98.480180,30.937726,6.764547,78.993583
1,50.0,99.0,30.0,7.8,84.0,Altona,mount st joseph girls college,46.0,99.5,28.75,4.675,69.5,11.841528,45.544337,12.752414,0.500988,27.782046
2,30.0,100.0,34.0,19.8,97.0,Armadale,lauriston girls school,30.0,100.0,34.00,19.800,97.0,32.206598,98.164926,31.139504,7.744409,92.199390
3,55.0,96.0,30.0,6.4,65.0,Aspendale-Chelsea-Carrum,mordialloc college,55.0,96.0,30.00,6.400,65.0,18.976452,33.122534,10.350792,2.208169,22.426716
4,45.0,95.0,28.0,3.0,38.0,Bairnsdale,nagle college,46.5,97.0,27.00,1.900,38.0,48.000000,99.000000,26.000000,0.800000,38.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,,,,0.0,,West Footscray,,,,,0.000,,40.613723,80.422973,25.163501,6.720191,65.278030
140,36.0,96.0,22.0,1.4,78.0,Whittlesea,whittlesea secondary college,36.0,96.0,22.00,1.400,78.0,34.526725,88.448137,20.632393,1.339259,65.849948
141,74.0,97.0,32.0,12.0,72.0,Williamstown,williamstown high school,61.5,96.5,30.00,7.000,63.0,28.441854,37.281890,12.299180,4.612193,27.673155
142,36.0,100.0,28.0,4.0,59.0,Wodonga,victory lutheran college,40.5,99.5,28.00,3.700,60.0,0.000000,0.000000,0.000000,0.000000,0.000000


In [32]:
distances = pd.read_csv('../downloading/distances.csv').drop(columns=['Unnamed: 0.1', 'Unnamed: 0', 'geometry', 'regions', 'code', 'centroid'])

distances

Unnamed: 0,suburbs,crow_distance_to_cbd,distance_to_cbd,duration_to_cbd,route_to_cbd
0,Albert Park-Middle Park-West St Kilda,4.535857,6702.5,811.4,"[[144.967419, -37.851489], [144.967423, -37.85..."
1,Altona,12.833585,17936.7,1656.4,"[[144.825016, -37.848943], [144.825517, -37.84..."
2,Armadale,7.261986,10138.0,1002.8,"[[145.020491, -37.856724], [145.020572, -37.85..."
3,Aspendale-Chelsea-Carrum,30.465132,50846.3,2914.2,"[[145.124414, -38.052875], [145.126879, -38.05..."
4,Bairnsdale,233.444538,279492.3,12766.0,"[[147.613996, -37.829069], [147.614083, -37.82..."
...,...,...,...,...,...
139,West Footscray,8.690401,10579.2,1251.1,"[[144.864518, -37.801985], [144.864918, -37.80..."
140,Whittlesea,36.783726,49950.5,3598.9,"[[145.101932, -37.49807], [145.104072, -37.498..."
141,Williamstown,8.738097,15184.9,1504.5,"[[144.884612, -37.860237], [144.886498, -37.86..."
142,Wodonga,249.542456,306607.1,12400.8,"[[146.809716, -36.113402], [146.810063, -36.11..."


In [18]:
land_cover = pd.read_csv('../../data/raw/land_cover/land_cover.csv').drop(columns=['Unnamed: 0'])

land_cover

Unnamed: 0,suburbs,total_area,developed_area,built_percentage,urban_percentage,disturbed_percentage,water_percentage,developed_percentage,nature_percentage,year,quarter
0,Albert Park-Middle Park-West St Kilda,7043125.0,4691875.0,0.032212,0.595794,0.038158,0.047387,0.666164,0.286450,2000,1
1,Albert Park-Middle Park-West St Kilda,7043125.0,4691875.0,0.032212,0.595794,0.038158,0.047387,0.666164,0.286450,2000,2
2,Albert Park-Middle Park-West St Kilda,7043125.0,4691875.0,0.032212,0.595794,0.038158,0.047387,0.666164,0.286450,2000,3
3,Albert Park-Middle Park-West St Kilda,7043125.0,4691875.0,0.032212,0.595794,0.038158,0.047387,0.666164,0.286450,2000,4
4,Albert Park-Middle Park-West St Kilda,7043125.0,4691875.0,0.032212,0.595794,0.038158,0.047387,0.666164,0.286450,2001,1
...,...,...,...,...,...,...,...,...,...,...,...
11515,Yarraville-Seddon,7304375.0,6715625.0,0.068452,0.830581,0.020365,0.009241,0.919398,0.071361,2018,4
11516,Yarraville-Seddon,7304375.0,6715625.0,0.068452,0.830581,0.020365,0.009241,0.919398,0.071361,2019,1
11517,Yarraville-Seddon,7304375.0,6715625.0,0.068452,0.830581,0.020365,0.009241,0.919398,0.071361,2019,2
11518,Yarraville-Seddon,7304375.0,6715625.0,0.068452,0.830581,0.020365,0.009241,0.919398,0.071361,2019,3


In [22]:
suburbs = land_cover['suburbs'].unique()
len(suburbs)

df_data = []
for suburb in suburbs:
    for year in range(2000, 2028):
        for quarter in range(1, 5):
            df_data.append([suburb, year, quarter])

df_left_join = pd.DataFrame(df_data, columns=['suburbs', 'year', 'quarter'])

df_left_join

Unnamed: 0,suburbs,year,quarter
0,Albert Park-Middle Park-West St Kilda,2000,1
1,Albert Park-Middle Park-West St Kilda,2000,2
2,Albert Park-Middle Park-West St Kilda,2000,3
3,Albert Park-Middle Park-West St Kilda,2000,4
4,Albert Park-Middle Park-West St Kilda,2001,1
...,...,...,...
16123,Yarraville-Seddon,2026,4
16124,Yarraville-Seddon,2027,1
16125,Yarraville-Seddon,2027,2
16126,Yarraville-Seddon,2027,3


In [33]:
merged = pd.merge(df_left_join, expanded_crime_df, on=['suburbs', 'year', 'quarter'], how='left')

merged = pd.merge(merged, schools_by_region, on=['suburbs'], how='left')

merged = pd.merge(merged, distances, on=['suburbs'], how='left')

merged = pd.merge(merged, land_cover, on=['suburbs', 'year', 'quarter'], how='left')

merged

merged.to_csv('../../data/raw/merged/merged_crime_schools_distances_land.csv')

In [None]:
list(merged.columns)