# Prepare essential data for interactive visualization

In [1]:
%load_ext autoreload
%autoreload 2
%cd D:\mobi-social-segregation-se

D:\mobi-social-segregation-se


In [2]:
# Load libs
import pandas as pd
import geopandas as gpd
import sqlalchemy
from statsmodels.stats.weightstats import DescrStatsW
from lib import preprocess

In [3]:
# Data location
user = preprocess.keys_manager['database']['user']
password = preprocess.keys_manager['database']['password']
port = preprocess.keys_manager['database']['port']
db_name = preprocess.keys_manager['database']['name']
engine = sqlalchemy.create_engine(f'postgresql://{user}:{password}@localhost:{port}/{db_name}?gssencmode=disable')

## 1. Load data and preprocess
### 1.1 Individual data

In [4]:
df_exp = pd.read_parquet('results/data4model_individual.parquet')
df_exp = df_exp.loc[(df_exp['weekday'] == 1) & (df_exp['holiday'] == 0), :]
df_exp.iloc[0]

uid                          00008608-f79e-414d-bf1c-25632d6bc059
zone                                                3490006228000
region                                                  1284C1040
wt_p                                                    84.428571
weekday                                                         1
holiday                                                         0
number_of_locations                                            43
number_of_visits                                               96
average_displacement                                    34.301877
radius_of_gyration                                     114.406844
median_distance_from_home                                 0.21554
Other                                                    0.042657
Lowest income group                                      0.088063
car_ownership                                            0.541455
cum_jobs                                              4813.309325
cum_stops 

In [5]:
def ice_exp_agg(data):
    wdf = DescrStatsW(data['ice_birth'], weights=data['wt_p'], ddof=1)
    return pd.Series(dict(ice_birth_exp = wdf.mean))
df_exp_ice = df_exp.groupby('region').apply(ice_exp_agg).reset_index()
df_exp_ice.head()

Unnamed: 0,region,ice_birth_exp
0,0114A0010,-0.162079
1,0114C1010,-0.148412
2,0114C1020,-0.147931
3,0114C1030,-0.123504
4,0114C1040,-0.268877


### 1.2 Visiting data by DeSO zones

In [6]:
df_vis = pd.read_parquet('results/data4model_agg.parquet')
df_vis.iloc[0]

deso                            0114A0010
number_of_locations                  46.0
number_of_visits               330.778708
average_displacement             4.452007
radius_of_gyration              14.638383
median_distance_from_home        2.073022
Not Sweden                       0.192247
Lowest income group              0.141309
car_ownership                    0.696203
cum_jobs                     12135.951891
cum_stops                             9.0
num_jobs                        13.033933
num_stops                        1.273545
gsi                              0.003682
length_density                   3.345828
evenness_income                  0.130333
ice_birth                       -0.167352
evenness_income_resi                 0.16
ice_birth_resi                   0.278675
Name: 0, dtype: object

### 1.3 Merge data

In [7]:
cols_vis = ['deso', 'number_of_visits', 'radius_of_gyration',
            'Not Sweden', 'Lowest income group',
            'car_ownership', 'num_stops', 'length_density',
            'evenness_income_resi',
            'ice_birth', 'ice_birth_resi']
df = pd.merge(df_vis[cols_vis], df_exp_ice.rename(columns={'region': 'deso'}), on='deso', how='left')

Add population size

In [8]:
df = pd.merge(df, pd.read_sql(sql="""SELECT region AS deso, pop FROM zone_stats;""",
                              con=engine),
              on='deso', how='left')
df.iloc[0]

deso                     0114A0010
number_of_visits        330.778708
radius_of_gyration       14.638383
Not Sweden                0.192247
Lowest income group       0.141309
car_ownership             0.696203
num_stops                 1.273545
length_density            3.345828
evenness_income_resi          0.16
ice_birth                -0.167352
ice_birth_resi            0.278675
ice_birth_exp            -0.162079
pop                            790
Name: 0, dtype: object

## 2. Add geo

In [9]:
gdf = gpd.GeoDataFrame.from_postgis(sql="""SELECT deso, geom FROM zones;""", con=engine)
gdf = pd.merge(gdf, df, on='deso', how='left')
gdf = gdf.to_crs(4326)
gdf.iloc[0]

deso                                                            0114A0010
geom                    POLYGON ((17.851913662393173 59.56670030514441...
number_of_visits                                               330.778708
radius_of_gyration                                              14.638383
Not Sweden                                                       0.192247
Lowest income group                                              0.141309
car_ownership                                                    0.696203
num_stops                                                        1.273545
length_density                                                   3.345828
evenness_income_resi                                                 0.16
ice_birth                                                       -0.167352
ice_birth_resi                                                   0.278675
ice_birth_exp                                                   -0.162079
pop                                   

## 3. Select three major regions

In [10]:
gdf['deso_2'] = gdf['deso'].apply(lambda x: x[0:2])
gdf['region_cat'] = gdf['deso'].apply(lambda x: x[4])
gdf['region_cat2'] = gdf['region_cat'].apply(lambda x: 'Rural/Suburban' if x in ('A', 'B') else 'Urban')

In [11]:
gdf = gdf.loc[gdf['deso_2'].isin(['01', '12', '14']), :]
gdf.loc[:, 'deso_2'] = gdf.loc[:, 'deso_2'].map({'01': 'Stockholm', '12': 'Malmo', '14': 'Gothenburg'})

In [12]:
gdf.groupby(['deso_2', 'region_cat2'])['deso'].count()

deso_2      region_cat2   
Gothenburg  Rural/Suburban     256
            Urban              736
Malmo       Rural/Suburban     230
            Urban              559
Stockholm   Rural/Suburban      96
            Urban             1191
Name: deso, dtype: int64

### 3.1 Save data as .geojson

In [13]:
gdf.iloc[0]

deso                                                            0114A0010
geom                    POLYGON ((17.851913662393173 59.56670030514441...
number_of_visits                                               330.778708
radius_of_gyration                                              14.638383
Not Sweden                                                       0.192247
Lowest income group                                              0.141309
car_ownership                                                    0.696203
num_stops                                                        1.273545
length_density                                                   3.345828
evenness_income_resi                                                 0.16
ice_birth                                                       -0.167352
ice_birth_resi                                                   0.278675
ice_birth_exp                                                   -0.162079
pop                                   

In [24]:
gdf.to_file('apps/interactive-residential-segregation-se/InteractiveVisiSegSweden/data/data.geojson', driver='GeoJSON')