# Descriptive analysis of the data used

Load GIS data, explore their structure, and test the flow generation method.

In [1]:
import pandas as pd
%load_ext autoreload
%autoreload 2

In [10]:
# Import self-defined libraries
import sweden
import netherlands
import workers

## 1. Load spatial zones and ground-truth data for Sweden
Check `lib\sweden.py` for what the below lines do.

In [6]:
# Initialise an object for storing the ground-truth data including zones
data_sweden = sweden.GroundTruthLoader()

# Load zones
data_sweden.load_zones()

# Create boundary for later use
data_sweden.create_boundary()

# Load ground-truth survey data into ODM form
data_sweden.load_odm()

origin_main_deso  desti_main_deso
0114A0010         0114C1170          182551.736842
                  0163C1140          810292.236111
                  0163C1170          182927.555556
                  0163C1350           94404.100629
                  0163C1380           23397.135431
Name: trip_weight, dtype: float64
ozone      dzone    
0114A0010  0114A0010    0.0
           0114C1010    0.0
           0114C1020    0.0
           0114C1030    0.0
           0114C1040    0.0
Name: trip_weight, dtype: float64


### 1.1 Check spatial zones and population

In [7]:
data_sweden.zones.head()

Unnamed: 0,zone,geometry
0,0114A0010,"POLYGON ((661116.252 6606615.603, 661171.409 6..."
1,0114C1010,"POLYGON ((666960.066 6598800.393, 666971.371 6..."
2,0114C1020,"POLYGON ((667034.814 6600076.634, 667032.984 6..."
3,0114C1030,"POLYGON ((667095.512 6599103.521, 667095.009 6..."
4,0114C1040,"POLYGON ((664610.264 6600922.821, 664638.910 6..."


In [8]:
# Load population data
data_sweden.load_population()
data_sweden.population.head()

Unnamed: 0,zone,pop
0,0114A0010,790
1,0114C1010,1608
2,0114C1020,1610
3,0114C1030,2365
4,0114C1040,2346


### 1.2 Calculate distances between zones
Figure out how you use the output data.

In [11]:
# This gives a stacked version
distances = workers.zone_distances(data_sweden.zones)

Calculating distances between zones...


In [14]:
# This gives a matrix-style dataframe
df_d = distances.unstack(level=1)
df_d.head(3)

zone,0114A0010,0114C1010,0114C1020,0114C1030,0114C1040,0114C1050,0114C1060,0114C1070,0114C1080,0114C1090,...,2584C1020,2584C1030,2584C1040,2584C1050,2584C1060,2584C1070,2584C1090,2584C1100,2584C1110,2584C1130
zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0114A0010,0.0,4.795251,3.548429,4.160562,2.703179,4.783886,1.849689,2.923878,2.159032,1.674308,...,933.767042,933.828072,935.067203,935.416006,935.630309,935.620128,936.859131,936.431609,936.568693,937.598973
0114C1010,4.795251,0.0,1.343363,0.686608,2.451822,1.246766,3.96865,2.079711,2.668393,3.29124,...,937.070798,937.137238,938.379341,938.733022,938.9422,938.922048,940.174915,939.742567,939.88225,940.918898
0114C1020,3.548429,1.343363,0.0,0.657756,1.693341,1.362823,3.073683,0.737077,1.389539,1.969922,...,935.881658,935.947128,937.188698,937.541507,937.751603,937.73324,938.983622,938.552138,938.691355,939.726864


## 2. Load spatial zones and ground-truth data for the Netherlands
Try this yourself.

In [None]:
data_netherlands = netherlands.GroundTruthLoader()
data_netherlands.load_zones()
data_netherlands.create_boundary()
data_netherlands.load_odm()