# Residential segregation
Calculate grid-based segregation indices on ethnic (birth region) and income groups.

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-social-segregation-se

D:\mobi-social-segregation-se


In [2]:
# Load libs
import pandas as pd
import geopandas as gpd
import sqlalchemy
from tqdm import tqdm
from lib import preprocess as preprocess

In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}')

## 1. Load data and process

In [46]:
df = pd.read_sql("""SELECT * FROM grids;""", con=engine)
df = df.loc[(df['pop'] != 0) & (df['pop_income'] != 0), :]
len(df)

138360

In [47]:
inc_var_list = ['income_q1', 'income_q2', 'income_q3', 'income_q4']
for var in inc_var_list:
    df[var] /= df['pop_income']
birth_var_list = ['birth_se', 'birth_nord', 'birth_eu', 'birth_other']
for var in birth_var_list:
    df[var] /= df['pop']
df.loc[:, 'Not Sweden'] = 1 - df.loc[:, 'birth_se']
df.loc[:, 'Sweden'] = df.loc[:, 'birth_se']

## 2. Save zonal statistics

In [48]:
df_stats = pd.concat([tp.set_index('zone') for tp in [df.loc[:, ['zone', 'income_q1']],
                                                        df.loc[:, ['zone', 'Not Sweden']],
                                                        df.loc[:, ['zone', 'Sweden']],
                                                        df.loc[:, ['zone', 'pop']]]], axis=1).reset_index()
df_stats.head()

Unnamed: 0,zone,income_q1,Not Sweden,Sweden,pop
0,3230006403750,0.279279,0.324873,0.675127,197.0
1,3952506385500,0.090909,0.141176,0.858824,85.0
2,5070006231750,0.263158,0.123711,0.876289,97.0
3,3900006500000,0.0,0.0,1.0,4.0
4,7070006642000,0.0,0.130435,0.869565,23.0


In [49]:
df_stats.rename(columns={'income_q1': 'Lowest income group'}).\
    to_sql('grid_stats', engine, schema='public',
           index=False, method='multi',
           if_exists='replace',
           chunksize=10000)

## 3. Calculate segregation measures
### 3.1 Evenness

Income

In [50]:
def evenness(row, n, var_list):
    suma = sum([abs(row[var] - 1/n) for var in var_list])
    s_i = n/(2*n - 2) * suma
    return s_i

In [51]:
inc_var_list = ['income_q1', 'income_q2', 'income_q3', 'income_q4']
n = 4
df.loc[:, 'income_evenness'] = df.apply(lambda row: evenness(row, n=n, var_list=inc_var_list), axis=1)

Region of birth

In [52]:
def dis_d(row=None, A=None, B=None, name_a=None, name_b=None):
    return abs(row[name_a]*row['pop']/A - row[name_b]*row['pop']/B)

In [53]:
A, B = sum(df.loc[:, 'Sweden']*df.loc[:, 'pop']), sum(df.loc[:, 'Not Sweden']*df.loc[:, 'pop'])
df.loc[:, 'birth_evenness'] = df.apply(lambda row: dis_d(row=row, A=A, B=B, name_a='Sweden', name_b='Not Sweden'), axis=1)

### 3.2 Isolation

In [55]:
def isolation(row, var, reverse=False, total=None, total_var=None):
    if reverse:
        iso_i = (1 - row[var])**2 * row[total] / total_var
        # iso_i = (iso_i - 1 + row[var]) / row[var]
    else:
        iso_i = row[var]**2 * row[total] / total_var
        # iso_i = (iso_i - row[var]) / (1 - row[var])
    return iso_i

Income

In [56]:
P_var = sum(df.loc[:, 'income_q1'] * df.loc[:, 'pop_income'])
df.loc[:, 'income_iso'] = df.apply(lambda row: isolation(row, var='income_q1', reverse=False, total='pop_income', total_var=P_var), axis=1)

Birth region

In [57]:
P_var = sum(df.loc[:, 'Not Sweden'] * df.loc[:, 'pop'])
df.loc[:, 'birth_iso'] = df.apply(lambda row: isolation(row, var='Not Sweden', reverse=False, total='pop', total_var=P_var), axis=1)

## 4. Save the data

In [58]:
df_seg = df.loc[:, ['zone', 'income_evenness', 'birth_evenness', 'income_iso', 'birth_iso']]
df_seg.head()

Unnamed: 0,zone,income_evenness,birth_evenness,income_iso,birth_iso
0,3230006403750,0.066066,1.676269e-05,7.572377e-06,1.077446e-05
1,3952506385500,0.30303,2.784388e-06,2.385395e-07,8.779008e-07
2,5070006231750,0.175439,4.264308e-06,2.301697e-06,7.692945e-07
3,3900006500000,1.0,4.933069e-07,0.0,0.0
4,7070006642000,0.666667,9.119187e-07,0.0,2.02776e-07


In [59]:
# Save the data to database
df_seg.to_sql('resi_seg_grid', engine, schema='public', index=False, method='multi', if_exists='replace', chunksize=10000)