In [1]:
import community
import statistics
import numpy as np
import pandas as pd
import networkx as nx
import geopandas as gpd
from community import community_louvain
import notebooks_loops_script as nls

# Calculating OD-matrix based mobility indictors
Using the OD-Matrices calculated in Notebook B_02, further metrics are calculated here.

In [2]:
# Rolling Window Parameters
start = "20200406"
end = "20200409"
denom = "3days"
allow_even_subsets = False
sam = False
del_one_tweeters = True

In [3]:
ref = "statistics_notebookdemo/" + denom + "_overlap.csv"
if type(sam) == int:
    ref = ref.split('.csv')[0] + "_" +str(sam) + ".csv"

In [4]:
path = f"data/{ref}"
file = pd.read_csv(f'data/{ref}', index_col='start_date')

In [5]:
middle = nls.middeling(start, end, allow_even_subsets=allow_even_subsets)
mm_path = f'./data/movement_matrices_notebookdemo/{denom}/mm_{middle}.npy'

### Load in Shapefiles

In [6]:
barrios_path = 'data/shps/neighborhoods.shp'
barrios_20823 = gpd.read_file(barrios_path).to_crs(20823)

barrios_20823.CODBAIRRO = barrios_20823.CODBAIRRO.astype(int)
barrios_20823['geometry'] = barrios_20823.geometry.representative_point()# oder representative_point?
barrios_20823['lat'] = barrios_20823['geometry'].y
barrios_20823['lon'] = barrios_20823['geometry'].x

In [7]:
stats = {}

### Load in Movement Matrices
Here, one movement matrix (or OD-Matrix) is loaded. In the notebook_loops_script.py all relevant matrices are loaded and processed via a loop.

In [8]:
mm = np.load(mm_path)

In [9]:
# set diagonal, if the workflow from before was executed as intended, there should be no values on teh diagonal anyway.
for i in range(mm.shape[0]):
    mm[i,i] = 0
        
mm_normal = mm / mm.sum()

In [10]:
# The amount of registered movements can be determined by the total sum of the OD matrix
stats['no_real_movements'] = mm.sum().sum()

### Graph based modularity
Caluclaute the Graph Modularity based on the community louvain package. If no graph can be constructed (that happens at some rolling windows with very few tweets) the value gets an np.nan

In [11]:
G = nx.from_numpy_array(mm)
partition = community_louvain.best_partition(G, weight='weight')

try:
    stats['graph_modularity'] = community.modularity(partition, G)
except:
    print("Undefined Modularity!")
    stats['graph_modularity'] = np.nan


In [12]:
stats

{'no_real_movements': 45.0, 'graph_modularity': 0.799375}

### Inflow / Outflow per Barrio
For each neighborhood, the inflow and outflow values get counted and saved.

In [13]:
inflow = {}
outflow = {}
for i in range(mm.shape[0]): # y/rows
    outflow['outflow_' + str(i+1) + '_barrio'] = mm[i, :].sum()

for i in range(mm.shape[1]): # x/cols
    inflow['inflow_' + str(i+1) + '_barrio'] = mm[:, i].sum()

stats.update(inflow)
stats.update(outflow)

### Mean \# of Meaningful Connections (>= 5 %)
Based on outgoing values, we calculate for each barrio the number of meaningful connections that make up 5 % or more of all outgoing connections. The mean of this number is saved.

In [14]:
# Barrios with no outgoing/incoming connections will be ignored connections
# [row, column]
def number_of_meaningful_connections(outflow=False):
    numbers_of_meaningful_connections = []
    
    for i in range(mm.shape[0]):
        rel = mm[i, :]
        if outflow:
            rel = mm[:, i]
    
        outgoing_sum = rel.sum()
        
        # if we dont have any connections, continue -> ignore these barrios
        if outgoing_sum == 0:
            continue

        outgoing_rel = rel / outgoing_sum
        count = len(outgoing_rel[outgoing_rel >= 0.05])

        numbers_of_meaningful_connections.append(count)
    
    return statistics.mean(numbers_of_meaningful_connections)
    
    
stats['number_meaningful_incoming'] = number_of_meaningful_connections()
stats['number_meaningful_outgoing'] = number_of_meaningful_connections(outflow=True)

### Mean Distance to highest connection
We calculate for each barrio the strongest partner and than the distances between its representatve points. The we calculate the mean between all barrios.

In [15]:
def get_mean_distance_to_highest(outflow=False):
    all_distances = []
    for i in range(mm.shape[0]):
        # list, in case it's multiple barrios
        distances = []

        rel = mm[i, :]
        if outflow:
            rel = mm[:, i]

        # if we dont have any connections, continue -> ignore these barrios
        if sum(rel) == 0:
            continue

        l = []
        l.extend(list(np.where(rel == max(rel))[0]))

        for partner in l:
            partner += 1
            og = barrios_20823.loc[barrios_20823.CODBAIRRO == i+1].geometry.values[0]
            comp = barrios_20823.loc[barrios_20823.CODBAIRRO == partner].geometry.values[0]
            distances.append(og.distance(comp))

        #print(distances)
        all_distances.append(statistics.mean(distances))

    return statistics.mean(all_distances)

stats['mean_distance_strongest_inflow'] = get_mean_distance_to_highest(outflow=False)
stats['mean_distance_strongest_outflow'] = get_mean_distance_to_highest(outflow=True)

In [16]:
start = int(start)
for name, val in stats.items():
    file.loc[start, name] = val
    
file.to_csv(f'data/{ref}')

In [17]:
for key, val in stats.items():
    print(key, ":", val)

no_real_movements : 45.0
graph_modularity : 0.799375
inflow_1_barrio : 0.0
inflow_2_barrio : 0.0
inflow_3_barrio : 0.0
inflow_4_barrio : 0.0
inflow_5_barrio : 3.0
inflow_6_barrio : 0.0
inflow_7_barrio : 0.0
inflow_8_barrio : 0.0
inflow_9_barrio : 0.0
inflow_10_barrio : 0.0
inflow_11_barrio : 0.0
inflow_12_barrio : 1.0
inflow_13_barrio : 0.0
inflow_14_barrio : 0.0
inflow_15_barrio : 0.0
inflow_16_barrio : 0.0
inflow_17_barrio : 0.0
inflow_18_barrio : 0.0
inflow_19_barrio : 0.0
inflow_20_barrio : 0.0
inflow_21_barrio : 0.0
inflow_22_barrio : 1.0
inflow_23_barrio : 0.0
inflow_24_barrio : 0.0
inflow_25_barrio : 1.0
inflow_26_barrio : 0.0
inflow_27_barrio : 0.0
inflow_28_barrio : 0.0
inflow_29_barrio : 0.0
inflow_30_barrio : 0.0
inflow_31_barrio : 0.0
inflow_32_barrio : 0.0
inflow_33_barrio : 0.0
inflow_34_barrio : 0.0
inflow_35_barrio : 1.0
inflow_36_barrio : 0.0
inflow_37_barrio : 0.0
inflow_38_barrio : 2.0
inflow_39_barrio : 1.0
inflow_40_barrio : 0.0
inflow_41_barrio : 0.0
inflow_42_bar