In [1]:
% load_ext autoreload
% autoreload 2


In [91]:
import glob
import os
import pandas as pd
from collections import defaultdict
import datetime
import src.mex_helper as mex
import folium
from src.utils.map_vis import time_slider_choropleth, geojson_per_row

# load daily stats

In [3]:
fns = sorted(glob.glob('stats/MexTwHrUniqCntVOZ/*-located.csv'))

In [4]:
%%time
store_wd = defaultdict(list)
store_wk = defaultdict(list)
store = {True: store_wd, False:store_wk}
for i,fn in enumerate(fns):
    if i%50==0:
        print('working on %dth file %s' % (i, fn))
    date = os.path.basename(fn)[:10]
#     if date=='2009-10-03': break
    date = datetime.datetime.strptime(date,'%Y-%m-%d')
    is_wd = date.weekday()<5
    tmp_df = pd.read_csv(fn,index_col=0)
    for gtid, row in tmp_df.iterrows():
        store[is_wd][gtid].append(row)
#     break

working on 0th file stats/MexTwHrUniqCntVOZ/2009-10-01-located.csv
working on 50th file stats/MexTwHrUniqCntVOZ/2009-11-20-located.csv
working on 100th file stats/MexTwHrUniqCntVOZ/2010-01-09-located.csv
working on 150th file stats/MexTwHrUniqCntVOZ/2010-03-27-located.csv
working on 200th file stats/MexTwHrUniqCntVOZ/2010-05-16-located.csv
CPU times: user 48.5 s, sys: 908 ms, total: 49.4 s
Wall time: 49.4 s


# average over weekday and weekend

In [5]:
%%time
average = {True: dict(), False: dict()}
for is_wd in [True, False]:
    print('is_wd=',is_wd)
    for gtid, rows in store[is_wd].items():
        avg_row = pd.DataFrame(rows).fillna(0).mean(axis=0)
        average[is_wd][gtid]=avg_row

is_wd= True
is_wd= False
CPU times: user 2min 30s, sys: 112 ms, total: 2min 30s
Wall time: 2min 30s


In [6]:
tw_avg_wd = pd.DataFrame(average[True]).T
tw_avg_wk = pd.DataFrame(average[False]).T

# distribute tower stats to grids

In [7]:
mex_t2g = mex.tower2grid('cities', 1000)

reading existing t2g file: data/mex_t2g_cities_1000m.csv


In [8]:
print('number of towers in cities has no call at all during weekday and weekend')
len(set(mex_t2g.gtid) - set(tw_avg_wd.index)), len(set(mex_t2g.gtid) - set(tw_avg_wk.index)),

number of towers in cities has no call at all during weekday and weekend


(233, 234)

In [9]:
def grid_avgerage(tw_avg, t2g):
    # there are grids without any call throughout the observation period
    g_avg = t2g.merge(tw_avg, left_on='gtid', right_index=True, how='left')

    for h in range(24):
        h=str(h)
        g_avg[h] = g_avg[h] * g_avg['weight']

    g_avg= g_avg.drop(['gtid','weight'], axis=1).groupby('grid').sum()  # fillna=0 by default

    return g_avg

In [11]:
g_avg_wd = grid_avgerage(tw_avg_wd, mex_t2g)
g_avg_wk = grid_avgerage(tw_avg_wk, mex_t2g)

# visualize by hour each city

In [12]:
mex_grids = mex.grids('cities', 1000)

reading existing grids


In [13]:
fake_dates = [f"1971-01-{i:02}" for i in range(1,25)]

In [14]:
mex_cities = mex.cities()

In [132]:
%%time
for dow, g_avg in [('wd', g_avg_wd), ('wk',g_avg_wk)]:
    print('day of week:', dow)
    for c in mex_cities.index:
        lon,lat = mex_cities.loc[c].geometry.centroid.coords[0]
        gc = mex_grids[mex_grids.city==c]
        gc_avg = g_avg.loc[gc.grid]
        values = gc_avg.T.to_dict('list')
        some_map = folium.Map(location=[lat,lon], zoom_start=10)
        time_slider_choropleth(gc, values, fake_dates, color_per_day=True).add_to(some_map)
        some_map.save(f'maps/MexGridChoropleth/{c}_{dow}.html')

day of week: wd
day of week: wk
CPU times: user 4min 29s, sys: 1.38 s, total: 4min 30s
Wall time: 4min 30s


## keep hotspot only

In [17]:
from src.utils import loubar_thres

In [42]:
def keep_hotspot(gc_avg):
    for h in gc_avg:
        arr = gc_avg[h]
        loubar, arr_thres = loubar_thres(arr,is_sorted=False)
        gc_avg[h][gc_avg[h]<=arr_thres]=0
#         print(h, loubar, arr_thres)

In [43]:
%%time
for dow, g_avg in [('wd', g_avg_wd), ('wk',g_avg_wk)]:
    print('day of week:', dow)
    for c in mex_cities.index:
        lon,lat = mex_cities.loc[c].geometry.centroid.coords[0]
        gc = mex_grids[mex_grids.city==c]
        gc_avg = g_avg.loc[gc.grid].copy()
        keep_hotspot(gc_avg)
        values = gc_avg.T.to_dict('list')
        some_map = folium.Map(location=[lat,lon], zoom_start=10)
        time_slider_choropleth(gc, values, fake_dates, color_per_day=True).add_to(some_map)
#         break
        some_map.save(f'maps/MexGridChoroplethHotSpot/{c}_{dow}.html')
#     break

day of week: wd
day of week: wk
CPU times: user 4min 7s, sys: 1.2 s, total: 4min 8s
Wall time: 4min 8s


## hotspot persistency

In [71]:
from branca.colormap import linear
cmap = linear.Reds_09.scale(0,24)

In [92]:
%%time
for dow, g_avg in [('wd', g_avg_wd), ('wk',g_avg_wk)]:
    print('day of week:', dow)
    
    some_map = folium.Map(location=[mex.CLAT,mex.CLON], zoom_start=8)
    
    for c in mex_cities.index:
        cgeom=mex_cities.loc[c].geometry
        lon,lat = cgeom.centroid.coords[0]
        gc = mex_grids[mex_grids.city==c].copy()
        gc_avg = g_avg.loc[gc.grid].copy()
        keep_hotspot(gc_avg)
        gc['persistency'] = (gc_avg!=0).sum(axis=1)
        gc = gc[gc.persistency!=0]
        gc = gc.append({'grid':-1, 'geometry':cgeom, 'persistency':0},ignore_index=True).sort_values('grid')
        gjson = gc[['grid','persistency','geometry']].to_json()
        folium.GeoJson(
            gjson,
            name=c,
            style_function=lambda feature: {
                'fillColor': colormap(feature['properties']['persistency']),
                'color': 'black',
                'weight': 1,
                'dashArray': '5, 5',
                'fillOpacity': 0.8,
            }
        ).add_to(some_map)
#         break
    
    cmap.caption = 'hotspot persistency (hours)'
    cmap.add_to(some_map)
    folium.LayerControl(collapsed=False).add_to(some_map)
    some_map.save(f'maps/MexGridChoroplethHotSpotPersistency_{dow}.html')
#     break


day of week: wd
day of week: wk
CPU times: user 7.05 s, sys: 32 ms, total: 7.08 s
Wall time: 7.08 s
