In [89]:
import sys
sys.path.append('../lib/')

import pandas as pd
import numpy as np
import geopandas as gpd

import workers
import v_ij
import math

from shapely.geometry import Point
from copy import deepcopy

### load grids, set area and average radius

In [90]:
grids = gpd.read_file('../results/grids_vgr_5km_density_deso.shp')

results_output = "../results/model_output_5km.txt"
results_benchmark = '../results/benchmark_5km.txt'

area = 25 # km

# A=pi*r_average^2
r_average = math.sqrt(area/math.pi) # km

### Get distance between grids

In [91]:
# This gives a stacked version
distances = workers.zone_distances(grids)
# This gives a matrix-style dataframe
df_d = distances.unstack(level=1)

Calculating distances between zones...


### Get population density and geometry

In [92]:
grid_name = list(grids.zone)
population_density = dict(zip(grids.zone, grids.density))
geometry = dict(zip(grids.zone, grids.geometry))

### Get $v^{tot}_{ij}$ between grids

In [93]:
# parameter = ln(f_max/f_min), f_min = 1/T, f_max = 1 T = 1000
T = 1000
f_max = 1
f_min = 1/T
parameter = math.log(f_max / f_min)

#ODM = {orig: {desti: v_{ori, desti}}}
# orig is the grid_name
# desti is the grid_name 


ODM_tot = dict()
for i in range(0, len(grid_name)):
    element = dict()
    for j in range(i + 1, len(grid_name)):
        number_of_trips = v_ij.average_daily_trips(population_density[grid_name[j]], area, r_average, df_d[grid_name[i]][grid_name[j]], parameter) + v_ij.average_daily_trips(population_density[grid_name[i]], area, r_average, df_d[grid_name[i]][grid_name[j]], parameter)
        element[grid_name[j]] = number_of_trips
    ODM_tot[grid_name[i]] = element

### Load Sweden VG zone data

In [94]:
zones = gpd.read_file('../dbs/sweden/zones/DeSO/DeSO_2018_v2.shp')
zones.loc[:, 'deso_3'] = zones.loc[:, 'deso'].apply(lambda x: x[:2])
zones_subset = zones.loc[zones['deso_3'] == '14', :]
zones_subset_info = dict(zip(zones_subset['deso'], zones_subset['geometry']))
zone_name = list(zones_subset['deso'])

### Aggregated Deso zone level

In [95]:
zones_subset.loc[:, 'deso_5'] = zones_subset.loc[:, 'deso'].apply(lambda x: x[:5])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


### Get  $v^{tot}_{ij}$ between aggregated Deso zones in VG

In [96]:
cover = []
checked = set()
for i in range(0, len(zone_name)):
    sub_cover = []
    for j in range(0, len(grid_name)):
        if j in checked:
            continue
        point_j = Point(geometry[grid_name[j]].centroid.x, geometry[grid_name[j]].centroid.y)
        if zones_subset_info[zone_name[i]].contains(point_j) == True:
            # grid_j in zone_i
            sub_cover.append(grid_name[j])
            checked.add(j)   # This grid has been occupied, we do not need to check it again.
    cover.append(sub_cover)
within = dict(zip(zone_name, cover))

In [None]:
bigzone_name = []
bigCover = []
subCover = []
old_name = zone_name[0][0:5]

bigzone_name.append(old_name)
subCover.extend(within[zone_name[0]])

for i in range(1, len(zone_name)):
    new_name = zone_name[i][0:5]

    if new_name == old_name:
        # this two zones belong the same big zone
        subCover.extend(within[zone_name[i]])

    if new_name != old_name:
        # find a new big zone
        #store old results
        bigCover.append(deepcopy(subCover))
        subCover.clear()

        #store new resutls
        bigzone_name.append(new_name)
        subCover.extend(within[zone_name[i]])

    old_name = new_name

# handle the lastest case
bigCover.append(subCover)


big_within = dict(zip(bigzone_name, bigCover))

In [None]:
ODM_big = dict()
for i in range(0, len(bigzone_name)):
    element = dict()
    for j in range(i, len(bigzone_name)):
        if i == j:
            average_daily_trips = 0
            for begin in range(0, len(big_within[bigzone_name[i]])):
                for end in range(begin + 1, len(big_within[bigzone_name[i]])):
                    average_daily_trips = average_daily_trips + ODM_tot[big_within[bigzone_name[i]][begin]][big_within[bigzone_name[i]][end]]

            element[bigzone_name[j]] = 2 * average_daily_trips
        if i != j:
            average_daily_trips = 0
            for begin in range(0, len(big_within[bigzone_name[i]])):
                for end in range(0, len(big_within[bigzone_name[j]])):
                    average_daily_trips = average_daily_trips + ODM_tot[big_within[bigzone_name[i]][begin]][big_within[bigzone_name[j]][end]]
            element[bigzone_name[j]] = average_daily_trips

    ODM_big[bigzone_name[i]] = element

### From upper triangular matrix to a vector

In [None]:
model_output = np.zeros(len(bigzone_name)*len(bigzone_name))   # the vector of model_output is stored here
for i in range(0, len(bigzone_name)):
    for j in range(0, len(bigzone_name)):
        if i <= j:
            model_output[i + j * len(bigzone_name)] = ODM_big[bigzone_name[i]][bigzone_name[j]]
        if i > j:
            model_output[i + j * len(bigzone_name)] = ODM_big[bigzone_name[j]][bigzone_name[i]]

In [None]:
np.savetxt(results_output, model_output)

#--------------From here we prepare the groundtruth data----------#

In [None]:
zones = gpd.read_file('../dbs/sweden/zones/DeSO/DeSO_2018_v2.shp')
zones = zones.rename(columns={"deso": "zone"})[['zone', 'geometry']]


trips = pd.read_csv("../dbs/sweden/survey/day_trips.csv")
trips = trips.loc[:, ["sub_id", 'trip_id', 'trip_main_id', 'distance_main',
                              'date', "origin_main_deso", "desti_main_deso", 'trip_weight']]
trips = trips.drop_duplicates(subset=["sub_id", 'trip_id', 'trip_main_id'])
trips["T"] = trips["date"].apply(lambda x: pd.to_datetime(x))
trips = trips.loc[~trips["T"].apply(lambda x: x.weekday()).isin([5, 6]), :]
trips.dropna(axis=0, how='any', inplace=True)

odms = trips.groupby(['origin_main_deso', 'desti_main_deso']).sum()['trip_weight']
odms = odms.reindex(pd.MultiIndex.from_product([zones.zone, zones.zone], names=['ozone', 'dzone']), fill_value=0)

In [None]:
ODM = dict()
for i in range(0, len(zone_name)):
    element = dict()
    for j in range(0, len(zone_name)):
        element[zone_name[j]] = odms.at[zone_name[i], zone_name[j]]
    ODM[zone_name[i]] = element

In [None]:
bigzone_name = []
bigCover = []
subCover = []
old_name = zone_name[0][0:5]

bigzone_name.append(old_name)
subCover.append(zone_name[0])

for i in range(1, len(zone_name)):
    new_name = zone_name[i][0:5]

    if new_name == old_name:
        # this two zones belong the same big zone
        subCover.append(zone_name[i])

    if new_name != old_name:
        # find a new big zone
        #store old results
        bigCover.append(deepcopy(subCover))
        subCover.clear()

        #store new resutls
        bigzone_name.append(new_name)
        subCover.append(zone_name[i])

    old_name = new_name

# handle the lastest case
bigCover.append(subCover)


big_within = dict(zip(bigzone_name, bigCover))

### Aggregated groundtruth data

In [None]:
ODM_truth = dict()
for i in range(0, len(bigzone_name)):
    element = dict()
    for j in range(0, len(bigzone_name)):
        average_daily_trips = 0
        if i == j:
            for begin in range(0, len(big_within[bigzone_name[i]])):
                for end in range(0, len(big_within[bigzone_name[i]])):
                    average_daily_trips = average_daily_trips + ODM[big_within[bigzone_name[i]][begin]][big_within[bigzone_name[i]][end]]
                    
        if i != j:
            for begin in range(0, len(big_within[bigzone_name[i]])):
                for end in range(0, len(big_within[bigzone_name[j]])):
                    average_daily_trips = average_daily_trips + ODM[big_within[bigzone_name[i]][begin]][big_within[bigzone_name[j]][end]]
        
        element[bigzone_name[j]] = average_daily_trips

    ODM_truth[bigzone_name[i]] = element

In [None]:
benchmark = np.zeros(len(bigzone_name)*len(bigzone_name))   # the vector of groundtruth data is stored here
for i in range(0, len(bigzone_name)):
    for j in range(0, len(bigzone_name)):
            benchmark[i + j * len(bigzone_name)] = ODM_truth[bigzone_name[i]][bigzone_name[j]]

In [None]:
np.savetxt(results_benchmark, benchmark)