# Residential segregation
Calculate grid-based segregation indices on ethnic (birth region) and income groups.

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-social-segregation-se

D:\mobi-social-segregation-se


In [2]:
# Load libs
import pandas as pd
import geopandas as gpd
import sqlalchemy
from tqdm import tqdm
from lib import preprocess as preprocess

  shapely_geos_version, geos_capi_version_string


In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}')

## 1. Load data and process

In [4]:
df = pd.read_sql("""SELECT * FROM grids;""", con=engine)
df = df.loc[(df['pop'] != 0) & (df['pop_income'] != 0), :]
len(df)

138360

In [5]:
inc_var_list = ['income_q1', 'income_q2', 'income_q3', 'income_q4']
for var in inc_var_list:
    df[var] /= df['pop_income']
birth_var_list = ['birth_se', 'birth_nord', 'birth_eu', 'birth_other']
for var in birth_var_list:
    df[var] /= df['pop']
df.loc[:, 'Not Sweden'] = 1 - df.loc[:, 'birth_se']
df.loc[:, 'Other'] = df.loc[:, 'birth_other']
df.loc[:, 'Sweden'] = df.loc[:, 'birth_se']

## 2. Save zonal statistics

In [6]:
df_stats = pd.concat([tp.set_index('zone') for tp in [df.loc[:, ['zone', 'income_q1']],
                                                      df.loc[:, ['zone', 'Other']],
                                                      df.loc[:, ['zone', 'Not Sweden']],
                                                      df.loc[:, ['zone', 'Sweden']],
                                                      df.loc[:, ['zone', 'pop']]]], axis=1).reset_index()
df_stats.head()

Unnamed: 0,zone,income_q1,Other,Not Sweden,Sweden,pop
0,3230006403750,0.279279,0.22335,0.324873,0.675127,197.0
1,3952506385500,0.090909,0.047059,0.141176,0.858824,85.0
2,5070006231750,0.263158,0.123711,0.123711,0.876289,97.0
3,3900006500000,0.0,0.0,0.0,1.0,4.0
4,7070006642000,0.0,0.130435,0.130435,0.869565,23.0


In [7]:
df_stats.rename(columns={'income_q1': 'Lowest income group'}).\
    to_sql('grid_stats', engine, schema='public',
           index=False, method='multi',
           if_exists='replace',
           chunksize=10000)