# Co-location and Crowding Analysis for March to October 2020

In [None]:
import json
import matplotlib.pyplot as plt
import os
import pandas as pd
import datetime
from collections import Counter
import numpy as np

from toolbox import *
from h3_tools import *

In [None]:
year=2020
dates=[
    {'month': 3, 'day': d} for d in range(2, 32)] +[
    {'month': 4, 'day': d} for d in range(1, 31)]+ [
    {'month': 5, 'day': d} for d in range(1, 32)]+[
    {'month': 6, 'day': d} for d in range(1, 31)]+[
    {'month': 7, 'day': d} for d in range(1, 32)]+[
    {'month': 8, 'day': d} for d in range(1, 32)]+[
    {'month': 9, 'day': d} for d in range(1, 31)]+[
    {'month': 10, 'day': d} for d in range(1, 30)]

In [None]:
resolution=11

In [None]:
building_cover_all_cells=json.load(open('../outputs/h3_res11_builtup.json'))
list(building_cover_all_cells.items())[0]

In [None]:
intervals, T=create_intervals(interval_length_minutes=20)

## Find number of people in each spatio-temporal cell throughout the study period

In [1]:
def find_collocations(stay_ind_by_interval, cell_ids_by_stay_ind):
    """
    for every interval, return the memberships of every h3 cell 
    and the proportion of building coverage of every cell
    where the membership > 1 and the cell is in Andorra
    """
    colocations_by_int=[]
    for s_ind_this_interval in stay_ind_by_interval:
        cell_ids_this_interval=[cell_ids_by_stay_ind[s_ind] for s_ind in s_ind_this_interval]
        n_stays_by_cell_id=Counter(cell_ids_this_interval)
        memberships_all_cells=[]
        bld_cover_all_cells=[]
        for cell in n_stays_by_cell_id:
            try:
                bld_cover=building_cover_all_cells[cell]
            except:
                bld_cover=-1
            members=n_stays_by_cell_id[cell]
            if ((members>1) and (bld_cover>=0)):
                memberships_all_cells.append(members)
                bld_cover_all_cells.append(bld_cover)
        colocations_by_int.append({'m': memberships_all_cells, 'b': bld_cover_all_cells})
    return colocations_by_int 

In [None]:
n_interactions_all_days=[]
distances_all_days=[]
colocations_all_days=[]

In [None]:
for i_d, date in enumerate(dates):
    month=date['month']
    day=date['day']
    print('{}_{}'.format(month, day))
    try:
        persons=json.load(open('../data/private/stays/{}_{}/stays_{}_{}_{}.json'.format(year, month, year, month, day)))
    except:
        print("Couldn't get data for {}_{}_{}".format(year, month, day))
        persons=[]
    if len(persons)>0:
        stay_ind_by_interval, cell_ids_by_stay_ind=get_h3_cells_by_interval(resolution=resolution, intervals=intervals, 
                                                                            T=T, persons=persons)
        colocations=find_collocations(stay_ind_by_interval, cell_ids_by_stay_ind)
        colocations_all_days.append(colocations)
    else:
        colocations_all_days.append([])

## Compute the indoor and outdoor crowding indices for each interval: count the unique subscriber pairs in each cell cell, attributable to both indoor and outdoor

In [None]:
def compute_crowding(colocations_all_days, indoor_exponent=1, outdoor_exponent=1):
    dt=1/len(colocations_all_days[0])  # the fraction of 1 day represented by each interval
    indoor_by_day, outdoor_by_day=[], []
    for i_d, co_d in enumerate(colocations_all_days):
        if len(co_d)>0:
            indoor_today, outdoor_today=0,0
            for interval in co_d:
                memberships_interval=np.array(interval['m'])
                pot_interactions_interval=np.multiply(memberships_interval, memberships_interval-1)/2
                buildings_interval=np.array(interval['b'])
                indoor_today+=np.sum(np.power(pot_interactions_interval, indoor_exponent)*buildings_interval)
                outdoor_today+=np.sum(np.power(pot_interactions_interval, outdoor_exponent)*(1-buildings_interval)) 
            indoor_by_day.append(dt* indoor_today)
            outdoor_by_day.append(dt * outdoor_today)
        else:
            indoor_by_day.append(float('NaN'))
            outdoor_by_day.append(float('NaN'))
    return indoor_by_day, outdoor_by_day

In [None]:
indoor_by_day, outdoor_by_day=compute_crowding(colocations_all_days, indoor_exponent=1, outdoor_exponent=1)

first_day_mobility_data=pd.to_datetime('2020/03/2', format='%Y/%m/%d')
last_day_mobility_data=first_day_mobility_data+timedelta(days=len(colocations)-1)

crowding_df=pd.DataFrame(index= pd.date_range(first_day_mobility_data, last_day_mobility_data),
                                                columns=['indoor_interactions', 'outdoor_interactions'])
crowding_df.index.name='Date'

crowding_df['indoor_interactions']=indoor_by_day
crowding_df['outdoor_interactions']=outdoor_by_day
crowding_df['all_interactions']=crowding_df['indoor_interactions']+crowding_df['outdoor_interactions']

In [None]:
crowding_df.to_csv('../outputs/metrics/2020/crowding_df.csv')