In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyproj import Geod
from tqdm.notebook import tqdm
from matplotlib.animation import FuncAnimation
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader
import graph_tool.all as gt
import networkx as nx
import pickle

In [2]:
# load data frame into memory
df = pd.read_csv('./VF_data/pandemos_tra_tapas_modell.csv')

In [3]:
# sort data by person and by start time
df = df.sort_values(['p_id', 'start_time_min'], ignore_index=True)

In [5]:
def get_nnodes(df, n):
    # Returns a list of n lists where each list contains all indices in the df of a certain node
    # The n selected nodes are choosen in order of appearence in df
    # Expects the df to be sorted by p_id and start_time_min

    unique_nodes = df.p_id.unique()
    nnodes = []
    
    if not n:
        n = len(unique_nnodes) # select all nodes if n is not provided

    elif len(unique_nodes) < n: # select at maximum all available nodes in df
        n = len(unique_nodes)

    print(n)

    last_ind, first_ind = 0, 0
    for node in tqdm(unique_nodes[:n], total=n):
        while True:
            if df.iloc[first_ind].p_id != node:
                nnodes.append(list(range(last_ind, first_ind)))
                break

            first_ind += 1
        
        last_ind = first_ind
        
    
    return nnodes, unique_nodes

In [6]:
def get_position_vector_nnodes2(df, n):
    # create points in space and time for every node
    index, unique_nnodes = get_nnodes(df, n)
    geod = Geod("+ellps=WGS84")

    # get edges of time interval
    index_flat = [item for sublist in index for item in sublist]
    tmin = df.iloc[index_flat].start_time_min.min()

    if tmin < 0:
        tmin = abs(tmin)
    else:
        tmin = 0
    tmax = (df.iloc[index_flat].activity_start_min + df.iloc[index_flat].activity_duration_min).max() + tmin

    R_lons2, R_lats2 = [], []
    R_lons, R_lats = [], []

    for i, (ind, id) in tqdm(enumerate(zip(index, unique_nnodes)), total=n):
        # get all df entries of current node
        dfj = df.iloc[ind]

        # convert trajectories and activites into position vector R(t)=((lon_0, lat_1), (lon_1, lat_1), ..., (lon_max, lat_max))^T
        S_loni, S_lati = dfj.lon_start.to_numpy(), dfj.lat_start.to_numpy()
        F_loni, F_lati = dfj.lon_end.to_numpy(), dfj.lat_end.to_numpy()
        T = (dfj.travel_time_sec / 60).round(0).to_numpy(int)
        At = dfj.activity_duration_min.to_numpy()
        wait = dfj.start_time_min.iloc[0] + tmin

        R = [(np.nan, np.nan),]*wait
        for lon1, lat1, lon2, lat2, npts, Ati in zip(S_loni, S_lati, F_loni, F_lati, T, At):
            Gi = geod.npts(lon1, lat1, lon2, lat2, npts, initial_idx=0, terminus_idx=0) # get points on geodesic
            Ai = [(lon2, lat2),]*Ati
            R += Gi + Ai

        R += [(np.nan, np.nan),]*(tmax - len(R))
        R = list(map(list, zip(*R))) # fast python list transpose
        
        R_lons.append(R[0])
        R_lats.append(R[1])

    # due to precission issues it is possible that the node with smallest overall starting time
    # has a slightly larger amount of entries. Handle this in the following exception by ignoring
    # first n time steps        
    try:
        R_lons = np.array(R_lons)
        R_lats = np.array(R_lats)
    except ValueError:
        print('Over shoot due to precission error')
        length = len(R_lons[0])
        for i, R in enumerate(R_lons):
            if len(R) != length:
                over_shoot = len(R) - length
                R_lons[i] = R_lons[i][over_shoot: ]
                R_lats[i] = R_lats[i][over_shoot: ]
                
                R_lons = np.array(R_lons)
                R_lats = np.array(R_lats)
                break



    # Return in format R_x(t)=[[r1(t=0), r2(t=0), ..., rn(t=0)], [r1(t=1), r2(t=1), ..., rn(t=tmax)], ..., [r1(t=tmax), r2(t=tmax), ..., rn(t=tmax)]]
    return R_lons.T, R_lats.T

### Network generation

In [7]:
df['activity_end_min'] = df['activity_start_min'] + df['activity_duration_min']

In [None]:
contacts = []

def get_contacts(grp):
    print(grp)
    print(grp['activity_end_min'].apply(lambda end: grp['activity_start_min'] < end))
    print('\n\n\n')
    

df.groupby('loc_id_end')[['p_id', 'activity_start_min', 'activity_end_min']].apply(get_contacts)