In [116]:
% load_ext autoreload
% autoreload 2


In [117]:
import glob
import os
import pandas as pd
from collections import defaultdict
import datetime
import src.mex_helper as mex
import folium
from src.utils.map_vis import time_slider_choropleth

# load daily stats

In [2]:
fns = sorted(glob.glob('stats/MexTwHrUniqCntVOZ/*-located.csv'))

In [8]:
%%time
store_wd = defaultdict(list)
store_wk = defaultdict(list)
store = {True: store_wd, False:store_wk}
for i,fn in enumerate(fns):
    if i%30==0:
        print('working on %dth file %s' % (i, fn))
    date = os.path.basename(fn)[:10]
#     if date=='2009-10-03': break
    date = datetime.datetime.strptime(date,'%Y-%m-%d')
    is_wd = date.weekday()<5
    tmp_df = pd.read_csv(fn,index_col=0)
    for gtid, row in tmp_df.iterrows():
        store[is_wd][gtid].append(row)
#     break

working on 0th file stats/MexTwHrUniqCntVOZ/2009-10-01-located.csv
working on 30th file stats/MexTwHrUniqCntVOZ/2009-10-31-located.csv
working on 60th file stats/MexTwHrUniqCntVOZ/2009-11-30-located.csv
working on 90th file stats/MexTwHrUniqCntVOZ/2009-12-30-located.csv
working on 120th file stats/MexTwHrUniqCntVOZ/2010-01-29-located.csv
working on 150th file stats/MexTwHrUniqCntVOZ/2010-03-27-located.csv
working on 180th file stats/MexTwHrUniqCntVOZ/2010-04-26-located.csv
working on 210th file stats/MexTwHrUniqCntVOZ/2010-05-26-located.csv
working on 240th file stats/MexTwHrUniqCntVOZ/2010-06-25-located.csv


# average over weekday and weekend

In [10]:
%%time
average = {True: dict(), False: dict()}
for is_wd in [True, False]:
    print('is_wd=',is_wd)
    for gtid, rows in store[is_wd].items():
        avg_row = pd.DataFrame(rows).fillna(0).mean(axis=0)
        average[is_wd][gtid]=avg_row

is_wd= True
is_wd= False
CPU times: user 2min 30s, sys: 88 ms, total: 2min 30s
Wall time: 2min 30s


In [11]:
tw_avg_wd = pd.DataFrame(average[True]).T
tw_avg_wk = pd.DataFrame(average[False]).T

# distribute tower stats to grids

In [18]:
mex_t2g = mex.tower2grid('cities', 1000)

reading existing t2g file: data/mex_t2g_cities_1000m.csv
reading existing grids


In [51]:
print('number of towers in cities has no call at all during weekday and weekend')
len(set(mex_t2g.gtid) - set(tw_avg_wd.index)), len(set(mex_t2g.gtid) - set(tw_avg_wk.index)),

number of towers in cities has no call at all during weekday and weekend


(233, 234)

In [82]:
def grid_avgerage(tw_avg, t2g):
    # there are grids without any call throughout the observation period
    g_avg = t2g.merge(tw_avg, left_on='gtid', right_index=True, how='left')

    for h in range(24):
        h=str(h)
        g_avg[h] = g_avg[h] * g_avg['weight']

    g_avg= g_avg.drop(['gtid','weight'], axis=1).groupby('grid').sum()  # fillna=0 by default

    return g_avg

In [84]:
g_avg_wd = grid_avgerage(tw_avg_wd, t2g)
g_avg_wk = grid_avgerage(tw_avg_wk, t2g)

# visualize by hour each city

In [87]:
mex_grids = mex.grids('cities', 1000)

reading existing grids


In [93]:
fake_dates = [f"1971-01-{i:02}" for i in range(1,25)]

In [122]:
mex_cities = mex.cities()

In [127]:
mex_cities.loc[c].geometry.centroid.coords[0]

(-99.13941118827485, 19.276889636237808)

In [132]:
%%time
for dow, g_avg in [('wd', g_avg_wd), ('wk',g_avg_wk)]:
    print('day of week:', dow)
    for c in mex_cities.index:
        lon,lat = mex_cities.loc[c].geometry.centroid.coords[0]
        gc = mex_grids[mex_grids.city==c]
        gc_avg = g_avg.loc[gc.grid]
        values = gc_avg.T.to_dict('list')
        some_map = folium.Map(location=[lat,lon], zoom_start=10)
        time_slider_choropleth(gc, values, fake_dates, color_per_day=True).add_to(some_map)
        some_map.save(f'maps/MexGridChoropleth/{c}_{dow}.html')

day of week: wd
day of week: wk
CPU times: user 4min 29s, sys: 1.38 s, total: 4min 30s
Wall time: 4min 30s
