In [2]:
import requests
from tqdm import tqdm_notebook, tqdm
from joblib import delayed, Parallel
from scipy.spatial import distance
import collections
import simplejson as json
import numpy as np
import pandas as pd
import utm
from mapboxgl.utils import *
from mapboxgl.viz import *
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

%matplotlib inline

In [3]:
with open('cities_grid_all_new.json', encoding='utf8') as f:
    cities_grid_all = json.load(f)
    
with open('subjects_grid_all_new.json', encoding='utf8') as f:
    regions_grid_all = json.load(f)
    
MIN = 0
MAX = 1
MEAN = 2
MEDIAN = 3
STD = 4
YEAR = 0
FLOORS = 1

In [199]:
def generate_coord_array(coord, step, num_cells):
    min_coord = coord - step / 2
    return list(min_coord + np.arange(num_cells+1) * (step / (num_cells)))

def cell_index(coord_array, coord):
    return sum(np.array(coord_array) < coord)

def construct_geojson(grid, cell_type, stat_type):
    lat_arr = generate_coord_array(*grid['lat_arr'])
    lon_arr = generate_coord_array(*grid['lon_arr'])
    geojson_all = dict({'type': 'FeatureCollection', 'features':[]})
    stat_arr = []
    for lat_min, lat_max in tqdm_notebook(zip(lat_arr[:-1],lat_arr[1:])):
        for lon_min, lon_max in zip(lon_arr[:-1],lon_arr[1:]):
            geo = dict()
            geo['type'] = 'Feature'
            name = str(cell_index(lat_arr, lat_max)) + '_' + str(cell_index(lon_arr, lon_max))
            link = grid['cells'][name][cell_type + '_link']
            stat = grid['cells'][link][cell_type][stat_type]
            stat_arr.append(stat)
            geo['properties'] = {'name':name, 'stat':int(stat)}
            
            coords = [[[lon_min,lat_min],[lon_max,lat_min],[lon_max,lat_max],[lon_min,lat_max],[lon_min,lat_min]]]
            geo['geometry'] = dict({'type': 'Polygon', 'coordinates':coords})
            geojson_all['features'].append(geo)
    stat_arr = [x for x in stat_arr if x > 0]
    
    return geojson_all, stat_arr, [int(np.percentile(stat_arr, q / 2)) for q in range(1,200)][::-40][::-1]#[np.percentile(stat_arr, q * 10) for q in range(1,10)]

def map_by_grid(data, name, data_type, stat_type):
    grid = data[name]
    geodata, stat_arr, stops_arr = construct_geojson(grid, data_type, stat_type)
    center = (grid['lon_arr'][0], grid['lat_arr'][0])
    if (data_type == 'house') and (stat_type == FLOORS):
        stops_arr = [2, 5, 9, 15, 25]
    viz = ChoroplethViz(geodata, 
                    access_token='pk.eyJ1Ijoia29raHRldiIsImEiOiJjamxtMXNwYnYwNWZmM3VtZHZ2bjF5dXRrIn0.3hIpPnM2KUhlQiFHH4pu1g',
                    color_property='stat',
                    color_stops=create_color_stops(stops_arr, colors='YlOrRd'),
                    line_stroke='*',
                    line_color='rgb(128,0,38)',
                    line_width=2,
                    opacity=0.5,
                    center=center,
                    zoom=9,
                    below_layer='waterway-label',
                    legend_layout='horizontal',
                    legend_key_shape='bar',
                    legend_key_borders_on=False)
    viz.show()
    return stat_arr, geodata

In [204]:
stat, geodata = map_by_grid(regions_grid_all, '1', 'house', FLOORS)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))

In [202]:
def stat_by_cell(data, name, cell_id):
    grid = data[name]['cells']
    links = [grid[cell_id]['SELL_link'], grid[cell_id]['RENT_link'], grid[cell_id]['house_link']]
    return {'sell':grid[links[0]]['SELL'], 'rent':grid[links[1]]['RENT'], 'house':grid[links[2]]['house']}

In [203]:
stat_by_cell(regions_grid_all, '1', '57_51')

{'sell': [77173.91304347826,
  3352464.96,
  683118.2588886034,
  453169.12093282904,
  536625.138121547],
 'rent': [363.6363636363636,
  11578.947368421053,
  1718.6269369989952,
  998.3644541115108,
  1500.0],
 'house': [1961.0, 6.0]}