# Descriptive analysis of the data used

Load GIS data, explore their structure, and test the flow generation method.

In [2]:
import sys

In [3]:
sys.path.append('../lib/')

In [1]:
import pandas as pd
import numpy as np
import numpy.matlib
%load_ext autoreload
%autoreload 2

In [5]:
# Import self-defined libraries
import sweden
import netherlands
import workers
import v_ij

## 1. Load spatial zones and ground-truth data for Sweden
Check `lib\sweden.py` for what the below lines do.

In [6]:
# Initialise an object for storing the ground-truth data including zones
data_sweden = sweden.GroundTruthLoader()

# Load zones
data_sweden.load_zones()

# Create boundary for later use
data_sweden.create_boundary()

# Load ground-truth survey data into ODM form
data_sweden.load_odm()

origin_main_deso  desti_main_deso
0114A0010         0114C1170          182551.736842
                  0163C1140          810292.236111
                  0163C1170          182927.555556
                  0163C1350           94404.100629
                  0163C1380           23397.135431
Name: trip_weight, dtype: float64
ozone      dzone    
0114A0010  0114A0010    0.0
           0114C1010    0.0
           0114C1020    0.0
           0114C1030    0.0
           0114C1040    0.0
Name: trip_weight, dtype: float64


### 1.1 Check spatial zones and population

In [7]:
data_sweden.zones.head()

Unnamed: 0,zone,geometry
0,0114A0010,"POLYGON ((661116.252 6606615.603, 661171.409 6..."
1,0114C1010,"POLYGON ((666960.066 6598800.393, 666971.371 6..."
2,0114C1020,"POLYGON ((667034.814 6600076.634, 667032.984 6..."
3,0114C1030,"POLYGON ((667095.512 6599103.521, 667095.009 6..."
4,0114C1040,"POLYGON ((664610.264 6600922.821, 664638.910 6..."


In [8]:
# Load population data
data_sweden.load_population()
data_sweden.population.head()

Unnamed: 0,zone,pop
0,0114A0010,790
1,0114C1010,1608
2,0114C1020,1610
3,0114C1030,2365
4,0114C1040,2346


### 1.2 Calculate distances between zones
Figure out how you use the output data.

In [9]:
# This gives a stacked version
distances = workers.zone_distances(data_sweden.zones)

Calculating distances between zones...


In [10]:
# This gives a matrix-style dataframe
df_d = distances.unstack(level=1)
df_d.head(3)

zone,0114A0010,0114C1010,0114C1020,0114C1030,0114C1040,0114C1050,0114C1060,0114C1070,0114C1080,0114C1090,...,2584C1020,2584C1030,2584C1040,2584C1050,2584C1060,2584C1070,2584C1090,2584C1100,2584C1110,2584C1130
zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0114A0010,0.0,4.795251,3.548429,4.160562,2.703179,4.783886,1.849689,2.923878,2.159032,1.674308,...,933.767042,933.828072,935.067203,935.416006,935.630309,935.620128,936.859131,936.431609,936.568693,937.598973
0114C1010,4.795251,0.0,1.343363,0.686608,2.451822,1.246766,3.96865,2.079711,2.668393,3.29124,...,937.070798,937.137238,938.379341,938.733022,938.9422,938.922048,940.174915,939.742567,939.88225,940.918898
0114C1020,3.548429,1.343363,0.0,0.657756,1.693341,1.362823,3.073683,0.737077,1.389539,1.969922,...,935.881658,935.947128,937.188698,937.541507,937.751603,937.73324,938.983622,938.552138,938.691355,939.726864


## 2. Load spatial zones and ground-truth data for the Netherlands
Try this yourself.

In [12]:
data_netherlands = netherlands.GroundTruthLoader()
data_netherlands.load_zones()
data_netherlands.create_boundary()
data_netherlands.load_odm()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trips_d.loc[:, 'distance'] = trips_d.loc[:, 'distance'] / 10 # hectometer to km


origin_zip  dest_zip
0           0           1.921383e+08
            1011        3.320952e+05
            1012        3.363486e+05
            1013        1.961443e+05
            1015        5.766617e+05
Name: weight_trip, dtype: float64
ozone  dzone
1011   1011     2.084926e+06
       1012     8.106812e+05
       1013     6.175562e+04
       1014     0.000000e+00
       1015     0.000000e+00
Name: weight_trip, dtype: float64


### Get mu

In [11]:
population = np.array(data_sweden.population['pop'])
#print(population)
#print(type(population))
#print(population[0])

mu = np.zeros((1, len(population)))
for i in range(0, len(population)):
    mu[0, i] = v_ij.magnitude_of_flows(population[i], 0.5, 1)
print(mu[0,0])

197.5


### 2.1 Check spatial zones and population

In [13]:
data_netherlands.zones.head()

Unnamed: 0,zone,geometry
0,1011,"POLYGON ((122246.232 487910.177, 122259.064 48..."
1,1012,"POLYGON ((121995.029 488243.202, 121999.664 48..."
2,1013,"MULTIPOLYGON (((120506.219 489494.551, 120495...."
3,1014,"MULTIPOLYGON (((120390.113 489860.746, 120387...."
4,1015,"POLYGON ((120665.642 488535.500, 120668.785 48..."


### 1.2 Calculate distance between zones

In [14]:
distances_netherlands = workers.zone_distances(data_netherlands.zones)

Calculating distances between zones...


In [15]:
df_d_netherlands = distances_netherlands.unstack(level=1)
df_d_netherlands.head(3)

zone,1011,1012,1013,1014,1015,1016,1017,1018,1019,1021,...,5354,5355,5356,5357,5358,5359,7515,9614,9622,9875
zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1011,0.0,0.736195,3.463281,3.789964,1.68383,1.679005,1.347231,0.946711,2.129366,1.552923,...,79.592635,80.764175,83.751075,84.440641,82.259187,85.711523,136.69449,152.31182,154.214447,167.483723
1012,0.736195,0.0,2.840138,3.068867,0.947979,1.088997,1.316826,1.680319,2.788404,1.868855,...,80.246246,81.419357,84.406759,85.09256,82.906605,86.367695,137.39915,152.651621,154.547095,167.898772
1013,3.463281,2.840138,0.0,1.199129,2.116596,2.916134,3.937533,4.353719,4.903717,3.405588,...,83.05513,84.226292,87.213047,87.903447,85.722444,89.17335,139.137789,152.266395,154.13076,167.847776
