# Making Changes to Integrated Dataset Based On Case Study Findings

#### Make new columns that are the combinations of much difficulty and cannot do at all in the disability section.

In [52]:
import pandas as pd
import numpy as np
import geopandas as gpd

In [125]:
df = pd.read_csv('../NZ_Data/integrated/reduced_integrated.csv')

In [127]:
def combine(diffi, cannot):
    total = []
    for d,c in zip(diffi,cannot):
        if np.isnan(c) and np.isnan(d):
            total.append(np.nan)
        elif np.isnan(d):
            total.append(c)
        elif np.isnan(c):
                total.append(d)
        else:
            total.append(c + d)
    return total

In [129]:
seeing = combine(list(df['C18_Seeing_A_lot_of_difficulty']), list(df['C18_Seeing_Cannot_do_at_all']))
hearing = combine(list(df['C18_Hearing_A_lot_of_difficult']), list(df['C18_Hearing_Cannot_do_at_all']))
walking = combine(list(df['C18_Walking_A_lot_of_difficult']), list(df['C18_Walking_Cannot_do_at_all']))
remembering = combine(list(df['C18_Remembering_A_lot_of_diffi']), list(df['C18_Remembering_Cannot_do']))
washing = combine(list(df['C18_Washing_A_lot_of_difficult']), list(df['C18_Washing_Cannot_do_at_all']))
communicating = combine(list(df['C18_Communicating_A_lot_of_dif']), list(df['C18_Communicating_Cannot_do']))

In [131]:
df['C18_difficulty_seeing'] = seeing
df['C18_difficulty_hearing'] = hearing
df['C18_difficulty_walking'] = walking
df['C18_difficulty_remembering'] = remembering
df['C18_difficulty_washing'] = washing
df['C18_difficulty_communicating'] = communicating

In [133]:
df.to_csv('../NZ_Data/integrated/modified_integrated.csv')

#### Make new column that is the combination of all income brackets up to 30k a year

In [136]:
low_income = []
for index, row in df.iterrows():
    _5 = 0 if np.isnan(row['C18_GroupedPersIncome_5K_Less']) else row['C18_GroupedPersIncome_5K_Less']
    _10 = 0 if np.isnan(row['C18_GroupedPersIncome_5K_10K']) else row['C18_GroupedPersIncome_5K_10K']
    _20 = 0 if np.isnan(row['C18_GroupedPersIncome_10K_20K']) else row['C18_GroupedPersIncome_10K_20K']
    _30 = 0 if np.isnan(row['C18_GroupedPersIncome_20K_30K']) else row['C18_GroupedPersIncome_20K_30K']

    low_income.append(_5 + _10 + _20 + _30)

In [138]:
df['low_income_count'] = low_income

In [115]:
df.to_csv('../NZ_Data/integrated/modified_integrated.csv')

#### Add columns for estimated population distribution per house during the day and night

In [159]:
rating_units = gpd.read_file('../NZ_Data/case_study/Rating_Units/Rating_Units.shp')

In [77]:
joined = gpd.sjoin(gdf2, rating_units, how = 'inner')

In [83]:
# spilt population between houses equally
# get the number of buildings in each sa1
# get SA1 districts
sa1 = list(set(joined['SA12018_V1']))
num_house = {}
for i in sa1:
    num_house[i] = 0
for index, row in joined.iterrows():
    if not pd.isnull(row['LandUse']) and row['LandUse'][0] == '9' and row['LandUse'] != '99':
        sa1_ = row['SA12018_V1']
        num_house[sa1_] = num_house[sa1_] + 1

In [85]:
per_house_day = {}
per_house_night = {}
total_day = 0
total_night = 0
for a in sa1:
    row = gdf2[gdf2['SA12018_V1'] == a]
    # get the total day and night resident population
    num_day = float(row['C18_CURPop'])
    num_night = float(row['C18_CNPop'])
    if not np.isnan(num_day):
        total_day = total_day + num_day
    if not np.isnan(num_night):
        total_night = total_night + num_night

    if num_house[a] > 0:
        per_house_day[a] = num_day / num_house[a]
        per_house_night[a] = num_night / num_house[a]
    else:
        per_house_day[a] = 0
        per_house_night[a] = 0

In [87]:
per_house_day_df = pd.DataFrame.from_dict(per_house_day, orient = 'index', columns = ['day_distribution'])
per_house_day_df = per_house_day_df.reset_index()

In [89]:
per_house_night_df = pd.DataFrame.from_dict(per_house_night, orient = 'index', columns = ['night_distribution'])
per_house_night_df = per_house_night_df.reset_index()

In [91]:
house_dist_df = pd.merge(per_house_day_df, per_house_night_df, on = 'index')

In [142]:
df = df.merge(house_dist_df, how = 'outer', left_on = 'SA12018_V1_00', right_on = 'index')

In [172]:
df = df.drop(['index', 'Unnamed: 0'], axis = 1)

In [177]:
df.to_csv('../NZ_Data/integrated/modified_integrated.csv')

In [61]:
shape = gpd.read_file('../QGIS/statsnz-statistical-area-1-2018-generalised-SHP/statistical-area-1-2018-generalised.shp')

In [65]:
lst = list(shape['SA12018_V1'])

In [67]:
for i in range(0,len(lst)):
    lst[i] = int(lst[i])

In [69]:
shape['SA12018_V1'] = lst

In [152]:
gdf2 = shape.merge(df, how='right', left_on = 'SA12018_V1', right_on = 'SA12018_V1_00')

In [167]:
gdf2 = gdf2.drop(['index', 'Unnamed: 0'], axis=1)

In [169]:
gdf2 = gdf2.loc[:, ~gdf2.columns.duplicated(keep='first')]

In [171]:
gdf2.to_file('../NZ_Data/integrated/modified_integrated.geojson', driver='GeoJSON')