# Traffic analysis - Madrid Central

In this notebook we want to analyze if there is a significant change in the use of traffic inside the area of Madrid Central when the measure was instaured. For that we analyze data from 2016 until 2021.

## Imports

In [76]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from matplotlib.path import Path
import json
import zipfile
import io
import os
import utm
import seaborn as sns

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool, Legend, ColumnDataSource, Title
from bokeh.tile_providers import get_provider, CARTODBPOSITRON
from bokeh.models import GeoJSONDataSource
from bokeh.transform import linear_cmap

output_notebook()
np.random.seed(42)

## Data download

The data we want to work with is very large, thus we need to download it from the source as it is not possible to upload it to the version control system we use (GitHub). 

In [7]:
def download_data():
    FIRST_MONTH_ID = 32
    LAST_MONTH_ID = 103
    DATA_PATH = "data"

    LAST_MONTH_ID = 44
    
    for id in range(FIRST_MONTH_ID, LAST_MONTH_ID+1):
        url = f"https://datos.madrid.es/egob/catalogo/208627-{id}-transporte-ptomedida-historico.zip"
        r = requests.get(url)
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall(DATA_PATH)

In [None]:
do_download = int(input("WRITE '1' TO DOWNLOAD DATA OR '0' TO NOT "))

if do_download:
    download_data()

## Display location of traffic measurement points

In [12]:
traffic_points = pd.read_csv("shared_data/traffic_points/pmed_trafico_03052016.csv", sep=";")
traffic_points.head()

Unnamed: 0,idelem,tipo_elem,cod_cent,nombre,st_x,st_y
0,1044,494,03FT08PM01,03FT08PM01,438963.314635,4474734.0
1,3600,494,PM30901,PM30901,443729.047369,4473268.0
2,3705,494,PM41451,PM41451,439858.261097,4471574.0
3,6823,494,PM41453,PM41453,439188.095183,4470895.0
4,7033,495,01015,Pº Castellana S-N - Pl. Colon-Hermosilla,441569.555897,4475502.0


First we need to calculate the correct *utm* for displaying in `bokeh` maps.

In [13]:
def utm_from_latlon(lat, lon):
    r_major = 6378137.000
    x = r_major * np.radians(lon)
    scale = x/lon
    y = 180.0/np.pi * np.log(np.tan(np.pi/4.0 + 
        lat * (np.pi/180.0)/2.0)) * scale

    return x, y

def get_lat_lon_utm(row):

    # 30 and 'T' is the zone of Madrid
    lat, lon = utm.to_latlon(row["st_x"], row["st_y"], 30, "T")
    
    x, y = utm_from_latlon(lat, lon)

    return pd.Series([lat, lon, x, y])

In [14]:
traffic_points[["latitude", "longitude", "utm_x", "utm_y"]] = traffic_points.apply(get_lat_lon_utm, axis=1)
traffic_points.head()

Unnamed: 0,idelem,tipo_elem,cod_cent,nombre,st_x,st_y,latitude,longitude,utm_x,utm_y
0,1044,494,03FT08PM01,03FT08PM01,438963.314635,4474734.0,40.421001,-3.719488,-414051.481782,4927311.0
1,3600,494,PM30901,PM30901,443729.047369,4473268.0,40.408129,-3.663184,-407783.811885,4925429.0
2,3705,494,PM41451,PM41451,439858.261097,4471574.0,40.392598,-3.70864,-412843.963659,4923159.0
3,6823,494,PM41453,PM41453,439188.095183,4470895.0,40.386431,-3.716471,-413715.710072,4922257.0
4,7033,495,01015,Pº Castellana S-N - Pl. Colon-Hermosilla,441569.555897,4475502.0,40.428107,-3.688839,-410639.639249,4928350.0


Then load the districts information to display them in the map.

In [28]:
with open("shared_data/districts/districts.geojson", "r") as geojson:
    geodata = json.load(geojson)

In [92]:
df_districts = pd.DataFrame([], columns=["name", "latitude",
                                         "longitude", "utm_x",
                                         "utm_y"])
for district in geodata["features"]:
    # Get district name
    district_name = district["properties"]["NOMBRE"]
    # Get district coordinates
    district_coord = district["geometry"]["coordinates"][0]
    df_district = pd.DataFrame(district["geometry"]["coordinates"][0], columns=["st_x", "st_y"])
    df_district["name"] = district_name
    # Calculate correct utm
    df_district[["latitude", "longitude", "utm_x", "utm_y"]] = df_district.apply(get_lat_lon_utm, axis=1)
    df_district = df_district.drop(columns=["st_x", "st_y"])
    # Append to all districts dataframe
    df_districts = pd.concat([df_districts, df_district]).reset_index(drop=True)


district_name = df_districts["name"].unique()
df_districts

Unnamed: 0,name,latitude,longitude,utm_x,utm_y
0,Centro,40.407345,-3.693162,-411120.867401,4925314.358763
1,Centro,40.407196,-3.693202,-411125.341089,4925292.526182
2,Centro,40.406986,-3.693227,-411128.197118,4925261.789915
3,Centro,40.407127,-3.693677,-411178.251532,4925282.481746
4,Centro,40.407256,-3.693849,-411197.420454,4925301.309521
...,...,...,...,...,...
9466,Moncloa - Aravaca,40.469899,-3.802628,-423306.659924,4934463.574953
9467,Moncloa - Aravaca,40.469823,-3.802359,-423276.645066,4934452.379956
9468,Moncloa - Aravaca,40.469748,-3.802093,-423247.03994,4934441.47116
9469,Moncloa - Aravaca,40.469672,-3.801822,-423216.881019,4934430.303684


Save in which district is each traffic point

In [94]:
traffic_points["district"] = "None"
points = traffic_points[["utm_x", "utm_y"]]

for name in district_name:
    path = Path(df_districts[df_districts["name"] == name][["utm_x", "utm_y"]])
    points_in_path_mask = path.contains_points(points)
    traffic_points.loc[points_in_path_mask, "district"] = name

traffic_points.head()

Unnamed: 0,idelem,tipo_elem,cod_cent,nombre,st_x,st_y,latitude,longitude,utm_x,utm_y,district
0,1044,494,03FT08PM01,03FT08PM01,438963.314635,4474734.0,40.421001,-3.719488,-414051.481782,4927311.0,Moncloa - Aravaca
1,3600,494,PM30901,PM30901,443729.047369,4473268.0,40.408129,-3.663184,-407783.811885,4925429.0,Moratalaz
2,3705,494,PM41451,PM41451,439858.261097,4471574.0,40.392598,-3.70864,-412843.963659,4923159.0,Carabanchel
3,6823,494,PM41453,PM41453,439188.095183,4470895.0,40.386431,-3.716471,-413715.710072,4922257.0,Carabanchel
4,7033,495,01015,Pº Castellana S-N - Pl. Colon-Hermosilla,441569.555897,4475502.0,40.428107,-3.688839,-410639.639249,4928350.0,Salamanca


In [77]:
district_colors = sns.color_palette('Spectral', len(district_name))
np.random.shuffle(district_colors)
district_colors

In [104]:
def get_color_from_palette(color):
    """ Getting colors for plotting """
    return tuple([int(c * 255) for c in color])

def get_dark_color_from_palette(color):
    """ Getting colors for plotting """
    return tuple([int(c * 200) for c in color])

In [120]:
p = figure(title="Traffic measurement points", x_axis_type="mercator", y_axis_type="mercator",
           height=700, width=800)


for name, color in zip(district_name, district_colors):
    # Districts
    source = ColumnDataSource(df_districts[df_districts["name"] == name])
    p.patch(x="utm_x", y="utm_y", color=get_color_from_palette(color), line_width=3, alpha=0.4, 
            source=source, legend_label=name, muted=True, muted_alpha=0.1)
    # Traffic points
    source = ColumnDataSource(traffic_points[traffic_points["district"] == name])
    p.circle(x="utm_x", y="utm_y", color=get_color_from_palette(color), line_width=1,
            source=source, legend_label=name, muted=True, muted_alpha=0.3, radius=30,
            line_color=get_dark_color_from_palette(color))

# Madrid Central
source = ColumnDataSource(df_districts[df_districts["name"] == "Centro"])
p.line(x="utm_x", y="utm_y", color="black", line_width=2, 
        source=source, legend_label="Madrid Central limit", muted=False, muted_alpha=0.3)

        

# Hover tooltip
TOOLTIPS = [
    ("Name", "@nombre"),
    ("District", "@district")
]
p.add_tools(HoverTool(tooltips=TOOLTIPS))

cartodb = get_provider(CARTODBPOSITRON)
p.add_tile(cartodb)
p.add_layout(p.legend[0], "right")
p.legend.click_policy = "mute"
show(p)