The files used in this notebook can be found in this folder:
https://www.dropbox.com/home/Remix/Growth/International/Agencies%20by%20country/Chile/DPTM%20(Santiago%2C%20Chile)/OD/raw%20files%20-%20don't%20use%20these

For the shapefile we used the file called `zones_shapefile.shp`
For the OD data we used the file called `Abril2017.MatrizOD.csv`

In [82]:
# Import libraries

import pandas as pd
import geopandas as gpd
import numpy as np

In [83]:
# Ingest the data
# Change the path to your computer's

shapefile_path = "/Users/santiagotoso/GoogleDrive/Master/Python/Santiago de Chile/MatrizOD_Subidas_Bajadas_2017.04/zones_shapefile/zones_shapefile.shp" 
od_input_path = "/Users/santiagotoso/Downloads/MatrizOD_Subidas_Bajadas_2017.04/Abril2017.MatrizOD.csv"
shapefile = gpd.read_file(shapefile_path)
od = pd.read_csv(od_input_path, sep=';')

# Data files

In [69]:
od.head()

Unnamed: 0,Periodo,ZonaSubida,ZonaBajada,nViajesSinBajada,nViajesConBajada,Expansion
0,01 - PRE NOCTURNO,0,0,0,0,0
1,01 - PRE NOCTURNO,0,1,0,0,0
2,01 - PRE NOCTURNO,0,10,0,0,0
3,01 - PRE NOCTURNO,0,100,0,0,0
4,01 - PRE NOCTURNO,0,101,0,0,0


In [70]:
# Check that we have a square metrix

print("Number of origin zones in od = " + str(len(od.ZonaSubida.unique())))
print("Number of detination zones = " + str(len(od.ZonaBajada.unique())))

Number of origin zones in od = 803
Number of detination zones = 803


In [71]:
# Check different time periods

print(od.Periodo.unique())

['01 - PRE NOCTURNO' '02 - NOCTURNO' '03 - TRANSICION NOCTURNO'
 '04 - PUNTA MANANA' '05 - TRANSICION PUNTA MANANA'
 '06 - FUERA DE PUNTA MANANA' '07 - PUNTA MEDIODIA'
 '08 - FUERA DE PUNTA TARDE' '09 - PUNTA TARDE'
 '10 - TRANSICION PUNTA TARDE' '11 - FUERA DE PUNTA NOCTURNO'
 '12 - PRE NOCTURNO']


In [72]:
# Rename the columns
# Filter only the time period the client wants to use

od = od.rename(index=str, columns={"ZonaSubida": "origin_id", "ZonaBajada": "destination_id", "Expansion": "count"})

manana = '04 - PUNTA MANANA'
od_manana = od[od.Periodo == manana][["origin_id","destination_id", 'count']]
od_manana.head()

Unnamed: 0,origin_id,destination_id,count
1934427,0,0,73854599
1934428,0,1,4923639774
1934429,0,10,0
1934430,0,100,0
1934431,0,101,369272995


In [73]:
# Change "," for "." in the numbers as string

def point_for_comma(str):
    return float(str.replace(",", "."))

od_manana['count'] = od_manana['count'].apply(point_for_comma)
od_manana.head()

Unnamed: 0,origin_id,destination_id,count
1934427,0,0,7.38546
1934428,0,1,4.92364
1934429,0,10,0.0
1934430,0,100,0.0
1934431,0,101,3.69273


In [76]:
# Save the data file

od_manana.to_csv("/Users/santiagotoso/GoogleDrive/Master/Python/Santiago de Chile/MatrizOD_Subidas_Bajadas_2017.04/od_santiago.csv", index = False)

# Shapefile

In [77]:
# Get the ID from the shapefile and create the id field 
# Notice that the zone ID is inside the "descriptio" filed 

import re

def id_extractor(str):
    x = re.split('>', str)[5]
    y = x[:-4] 
    return y

# Create the column "area_id" with the zone ID we just extracted

shapefile['area_id']  = shapefile.descriptio.apply(id_extractor)
shapefile.head(3)

Unnamed: 0,Name,descriptio,timestamp,begin,end,altitudeMo,tessellate,extrude,visibility,drawOrder,icon,geometry,area_id
0,584,<table><tr><td>Zona777</td><td>625</td></tr><t...,,,,,1,0,-1,,,"POLYGON Z ((-70.543571 -33.548025 0, -70.54293...",625
1,770,<table><tr><td>Zona777</td><td>806</td></tr><t...,,,,,1,0,-1,,,"POLYGON Z ((-70.787311 -33.564826 0, -70.79797...",806
2,596,<table><tr><td>Zona777</td><td>634</td></tr><t...,,,,,1,0,-1,,,"POLYGON Z ((-70.797974 -33.555577 0, -70.80288...",634


In [78]:
# Give the shapefile the correct projection
# Create a new geo data frame with the relevant variables

shapefile.crs = {'init': 'epsg:4326'}
shapefile_output = shapefile[['area_id', 'Name', 'geometry']]
shapefile_output.head(3)

Unnamed: 0,area_id,Name,geometry
0,625,584,"POLYGON Z ((-70.543571 -33.548025 0, -70.54293..."
1,806,770,"POLYGON Z ((-70.787311 -33.564826 0, -70.79797..."
2,634,596,"POLYGON Z ((-70.797974 -33.555577 0, -70.80288..."


In [79]:
import fiona; fiona.supported_drivers

{'AeronavFAA': 'r',
 'ARCGEN': 'r',
 'BNA': 'raw',
 'DXF': 'raw',
 'CSV': 'raw',
 'OpenFileGDB': 'r',
 'ESRI Shapefile': 'raw',
 'GeoJSON': 'rw',
 'GPKG': 'rw',
 'GML': 'raw',
 'GPX': 'raw',
 'GPSTrackMaker': 'raw',
 'Idrisi': 'r',
 'MapInfo File': 'raw',
 'DGN': 'raw',
 'S57': 'r',
 'SEGY': 'r',
 'SUA': 'r'}

In [80]:
# Save the geo data frame as a shapefile

shapefile_output.to_file(driver = 'ESRI Shapefile',
                         #crs_wkt = prj,
                         filename = "/Users/santiagotoso/GoogleDrive/Master/Python/Santiago de Chile/MatrizOD_Subidas_Bajadas_2017.04/output_shapefile/output_shapefile.shp" )

  with fiona.drivers():


In [81]:
# Does it look good?

test = gpd.read_file("/Users/santiagotoso/GoogleDrive/Master/Python/Santiago de Chile/MatrizOD_Subidas_Bajadas_2017.04/output_shapefile/output_shapefile.shp" )
test.head()

Unnamed: 0,area_id,Name,geometry
0,625,584,"POLYGON Z ((-70.543571 -33.548025 0, -70.54293..."
1,806,770,"POLYGON Z ((-70.787311 -33.564826 0, -70.79797..."
2,634,596,"POLYGON Z ((-70.797974 -33.555577 0, -70.80288..."
3,487,457,"POLYGON Z ((-70.566011 -33.550304 0, -70.56159..."
4,439,384,"POLYGON Z ((-70.73079 -33.594186 0, -70.735468..."
