# infrastructure-attributes

The procedure of constructing infrastructure attributes for supernodes.

Warning: can eat up to 16 GB RAM while calculating distances between infrastructure objects and supernodes.

Requires:
1. `data/preprocessed/infrastructure.json`
2. `data/supernodes/supernodes.json`

Produces:
1. `data/supernodes/supernode_attributes.json`

In [1]:
import sys
sys.path.append('..')

In [2]:
from itertools import chain, product

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm

from myutils.spatial import get_earth_distances
from myutils.json import save_json

from config import infrastructure_fpath, supernodes_fpath, supernode_attributes_fpath

In [3]:
infrastructure = pd.read_json(infrastructure_fpath)

infrastructure.head(2)

Unnamed: 0,id,lat,lon,types,name
0,2,59.771793,30.326111,[sight_place_tourism],Центр Круглого Зала (ЦКЗ)
1,238809,60.189495,29.700196,[sight_place_tourism],Башмаки неизвестного дачника (2009)


In [4]:
infrastructure_type_counts = pd.Series(chain.from_iterable(infrastructure['types'])).value_counts()

infrastructure_types = infrastructure_type_counts.index.tolist()

infrastructure_type_counts.head()

residential_building    22435
shop                    13260
catering_place           7746
service                  4288
medicine                 4061
dtype: int64

In [5]:
supernodes = pd.read_json(supernodes_fpath)

supernodes.head(2)

Unnamed: 0,id,stops,diameter,types,lat,lon
0,0,"[4609, 29290, 3082, 17005, 16142, 15984, 16466...",0.331043,"[bus, trolley, tram]",60.017943,30.367853
1,1,"[16964, 4677, 19280, 2545, 49, 26803, 26804, 1...",0.237753,"[bus, tram, trolley, subway]",59.990263,30.254647


In [6]:
infrastructure_coords = infrastructure[['lat', 'lon']].values
supernode_coords = supernodes[['lat', 'lon']].values

distances = get_earth_distances(infrastructure_coords, supernode_coords)

distances.shape

(75552, 3226)

In [7]:
window = 0.2

supernode_attributes = pd.DataFrame(index=supernodes['id'], columns=infrastructure_types).fillna(0)

for i, row in enumerate(tqdm(distances)):
    obj_types = infrastructure.iloc[i]['types']
    
    dmin = row.min()
    dmax = dmin + window
    
    close_supernodes = np.where(row <= dmax)[0]
    
    for s, t in product(close_supernodes, obj_types):
        supernode_attributes.loc[s, t] += 1
        
supernode_attributes = supernode_attributes.reset_index()

supernode_attributes.head(2)

100%|██████████| 75552/75552 [00:35<00:00, 2136.79it/s]


Unnamed: 0,id,residential_building,shop,catering_place,service,medicine,sight_place_tourism,education,sport,car_supply,...,organisation,supermarket,hotel_business,electronics_and_telecommunication,industrial,bank_and_money,printing_and_books,religion,post_office,business_center_or_mall_or_marketplace
0,0,39,9,3,2,4,4,6,3,0,...,2,3,0,0,0,1,0,0,1,0
1,1,5,172,48,35,10,3,3,2,6,...,2,2,0,9,3,3,13,3,7,7


In [8]:
supernode_attributes_json = [row.to_dict() for _, row in supernode_attributes.iterrows()]

len(supernode_attributes_json)

3226

In [9]:
save_json(supernode_attributes_json, supernode_attributes_fpath)