In [90]:
import geopandas as gpd
import pandas as pd
from skmob.models import Gravity
from tqdm import tqdm
import json
import numpy as np

## City regions shp

In [70]:
city_regions = gpd.read_file('../../data/raw/EtapII-REJONY_wroclaw_centroidy.shp')
city_regions = city_regions[['NUMBER', 'NAME', 'geometry']]

In [71]:
city_regions.head()

Unnamed: 0,NUMBER,NAME,geometry
0,22,Komandorska/Swobodna,POINT (362044.353 360502.534)
1,23,Centrum Południowe,POINT (361753.043 360767.194)
2,24,Stysia,POINT (361487.550 361003.421)
3,25,Ostrów Tumski,POINT (363312.469 362711.878)
4,26,Szczytnicka,POINT (363877.247 362685.057)


## City regions info

In [72]:
regions_info = pd.read_excel('../../data/raw/Etap_II_BIG_DATA_szacunki_liczby_mieszkancow_i_miejsc_pracy.xlsx', 'Wrocław', engine='openpyxl', header=[0,1, 2])

# flat columns names
new_columns = []
i = 1
for col in regions_info.columns:
    next_col = col[2]
    if next_col in new_columns:
        next_col = next_col + '_' + str(i)
        i += 1
    new_columns.append(next_col)

regions_info.columns = new_columns

# select and rename columns
columns = {
    'REJON_poprawny': 'Regions',
    'REJON_AGR': 'Regions_agr',
    'NAME,C,254': 'Region_name',
    'BIURA,N,10,0': 'Objects_offices',
    'HANDEL,N,10,0': 'Objects_shops',
    'MIESZKANIA,N,10,0': 'Objects_apartments',
    'PRZEMYSL,N,10,0': 'Objects_industry',
    'SZKOLY,N,10,0': 'Objects_schools',
    'UCZELNIE,N,10,0': 'Objects_universitites',
    'Mi': 'Poluation_UM',
    'Mi_6+': 'Population_6plus_UM',
    'liczba mieszkańców z danych BIG DATA': 'Population_BD',
    'Metoda Autorska - Ostateczna liczba mieszkańców 6+ w poszczególnych rejonach': 'Population_AUT',
    'Metoda 1 \nLICZBA KART SIM „NIERUSZAJĄCYCH SIĘ” W GODZINACH NOCNYCH DLA KAŻDEGO REJONU': 'Population_MET1',
    'Metoda 2 \nLICZBA KART SIM ROZPOCZYNAJĄCYCH PRZEMIESZCZENIE W GODZINACH SZCZYTU PORANNEGO DLA KAŻDEGO REJONU': 'Population_MET2',
    'MIEJSCA PRACY*': 'Jobs_GUS',
    'Metoda Autorska - Liczba miejsc pracy z danych BIG DATA\n(+ uczniowie, studenci)': 'Jobs_AUT',
    'Metoda 3 \nLICZBA KART SIM „NIERUSZAJĄCYCH SIĘ” W GODZINACH 11-12 i 12-13 DLA KAŻDEGO REJONU': 'Jobs_MET3',
    'Metoda 4 \nLICZBA KART SIM KOŃCZĄCYCH PRZEMIESZCZENIE W GODZINACH SZCZYTU PORANNEGO DLA KAŻDEGO REJONU': 'Jobs_MET4'
}

regions_info = regions_info[list(columns.keys())]
regions_info = regions_info.rename(columns=columns)

In [73]:
regions_info.head()

Unnamed: 0,Regions,Regions_agr,Region_name,Objects_offices,Objects_shops,Objects_apartments,Objects_industry,Objects_schools,Objects_universitites,Poluation_UM,Population_6plus_UM,Population_BD,Population_AUT,Population_MET1,Population_MET2,Jobs_GUS,Jobs_AUT,Jobs_MET3,Jobs_MET4
0,1.0,A10,Rynek,106538.0,58760.0,131606.0,288.0,0.0,0.0,1695.0,1645.0,5785.0,1698.36055,2714.0,2105.0,10354.0,6986.0,9875.0,2262.0
1,2.0,A9,UWr,12954.0,4082.0,101166.0,3824.0,244.0,5911.0,1719.0,1657.0,565.0,1710.749807,226.0,339.0,4798.0,609.0,1026.0,182.0
2,3.0,A9,Hala Targowa,9500.0,12060.0,62071.0,271.0,434.0,1412.0,1301.0,1253.0,1348.0,1391.726457,548.0,713.0,4751.0,1139.0,2349.0,730.0
3,4.0,A11,Pl. Dominikański,70156.0,104991.0,64536.0,188.0,0.0,0.0,650.0,620.0,704.0,726.836369,269.0,191.0,4250.0,722.0,1566.0,217.0
4,5.0,A11,Skargi,13656.0,10927.0,60003.0,286.0,1785.0,0.0,1072.0,1034.0,696.0,1067.540917,400.0,348.0,5194.0,965.0,1305.0,321.0


## Merge dfs

In [74]:
gdf = city_regions.merge(regions_info.rename(columns={'Regions': 'NUMBER'}).dropna(), on='NUMBER')
gdf = gdf.rename(columns={'NUMBER': 'Region', 'NAME':'Name'})
gdf.head()

Unnamed: 0,Region,Name,geometry,Regions_agr,Region_name,Objects_offices,Objects_shops,Objects_apartments,Objects_industry,Objects_schools,...,Poluation_UM,Population_6plus_UM,Population_BD,Population_AUT,Population_MET1,Population_MET2,Jobs_GUS,Jobs_AUT,Jobs_MET3,Jobs_MET4
0,22,Komandorska/Swobodna,POINT (362044.353 360502.534),22,Komandorska/Swobodna,20.0,3886.0,132354.0,63.0,492.0,...,3213.0,3102.0,1278.0,3202.622752,574.0,374.0,1255.0,1200.0,1609.0,478.0
1,23,Centrum Południowe,POINT (361753.043 360767.194),23,Centrum Południowe,37467.0,75850.0,130197.0,186.0,0.0,...,2404.0,2303.0,6508.0,2377.704771,2758.0,2418.0,10258.0,4750.0,5463.0,1748.0
2,24,Stysia,POINT (361487.550 361003.421),24,Stysia,5155.0,11262.0,165949.0,87.0,638.0,...,3769.0,3615.0,4959.0,5119.860164,1861.0,1070.0,1373.0,2296.0,2862.0,626.0
3,25,Ostrów Tumski,POINT (363312.469 362711.878),A3,Ostrów Tumski,293.0,8508.0,74826.0,216.0,0.0,...,794.0,774.0,2497.0,799.107031,1017.0,739.0,413.0,1479.0,2088.0,722.0
4,26,Szczytnicka,POINT (363877.247 362685.057),26,Szczytnicka,4515.0,1869.0,239115.0,162.0,903.0,...,4906.0,4685.0,2845.0,4836.972145,1026.0,1061.0,1468.0,1522.0,1748.0,817.0


## Distances between regions

In [76]:
type(gdf.iloc[0]['geometry'])

shapely.geometry.point.Point

In [77]:
gdf[gdf['Region']==1].iloc[0]['geometry'].distance(gdf[gdf['Region']==4].iloc[0]['geometry'])

440.86286944122725

In [83]:
regions = gdf['Region'].to_list()

distance_between_regions = {}

for i in tqdm(regions):
    distance_between_regions[i] = {}
    
    for j in regions:
        distance = gdf[gdf['Region']==i].iloc[0]['geometry'].distance(gdf[gdf['Region']==j].iloc[0]['geometry'])
        distance_between_regions[i][j] = distance

100%|██████████| 375/375 [07:18<00:00,  1.17s/it]


In [85]:
with open('../../data/processed/kr/distance_between_regions.json', 'w') as f:
    json.dump(distance_between_regions, f) 

## Gravity models

In [29]:
dest = ['dom', 'praca', 'szkola', 'uczelnia', 'inne']

In [86]:
columns_for_dest = {
    'dom': 'Population_AUT',
    'praca': 'Jobs_AUT',
    'szkola': 'Objects_schools',
    'uczelnia': 'Objects_universitites',
    'inne': 1
}

In [105]:
gravity = {}
fill_0_distance_with = 150

for destination in dest:
    gravity[destination] = {}

    for i in tqdm(regions):
        gravity[destination][i] = {}

        for j in regions:

            if type(columns_for_dest[destination]) == str:
                value_j = gdf[gdf['Region']==j].iloc[0][columns_for_dest[destination]]
            else:
                value_j = columns_for_dest[destination]

            distance = distance_between_regions[i][j]
            if distance == 0:
                distance = fill_0_distance_with

            gravity[destination][i][j] = value_j / distance

        # normalization
        s = np.sum(np.array(list(gravity[destination][i].values())))
        for j, g in gravity[destination][i].items():
            gravity[destination][i][j] = g / s

100%|██████████| 375/375 [03:09<00:00,  1.98it/s]


In [None]:
with open('../../data/processed/kr/gravity.json', 'w') as f:
    json.dump(gravity, f) 