In [17]:
import pandas as pd
import geopandas as gpd

import folium
from shapely.geometry import Point
from shapely.geometry import LineString
from shapely.geometry import MultiLineString
from shapely import wkt

from IPython.display import display

from sklearn.preprocessing import MinMaxScaler

In [18]:
stations_dir = "../Data/stations.csv"
lines_def_dir = "../Data/lines_definition.csv"
route_dir = "../Data/routes.csv"

output_dir = "../Maps/london.html"

In [19]:
tube_df = pd.read_csv(stations_dir, index_col='id') #this file contains the name, zone, total lines and rail per station, also the zone it belongs in the metro system
lines_df = pd.read_csv(lines_def_dir) #this file contains the conexions between each station
routes_df = pd.read_csv(route_dir) #this file contains the info of each metro line, the number, the color and the stripe if it has

In the next cell I create a GeoDataFrame with the latitude and longitude as the geomatry column in the form of a shapely Point 

In [20]:
tube_df['geometry'] = None
for index, row in tube_df.iterrows():
    tube_df.loc[index, ['geometry']] = Point(row['latitude'], row['longitude'])
tube_df.pop('latitude')
tube_df.pop('longitude') #there's no need to keep doubleled data in the dataframe
tube_df = gpd.GeoDataFrame(tube_df)

In the next cell we create a LineString for each conection between stations, so that later we can show the metro lsystem on the map

In [21]:
lines_df['geometry'] = None

for index, row in lines_df.iterrows(): 
    lines_df.loc[index, ['geometry']] = LineString([tube_df.loc[row.station1, ['geometry']].values[0], tube_df.loc[row.station2, ['geometry']].values[0]])

A little messy, baut in this next cell we concatenate each LineString in a MultilineString, just in case, to have all those conections in the routes geometry column

In [22]:
routes_df['geometry'] = None

for index, row in routes_df.iterrows():
    lines_tube = lines_df.loc[lines_df['line'] == row["line"], ['geometry']].values.tolist()
    lines_aux = []
    for n in lines_tube:
        lines_aux.append(n.pop())
    
    routes_df.loc[index, ['geometry']] = MultiLineString(lines_aux)

Now we insert each line in a map with its colour and the stations in the form of circles with the zone it belongs represented by a colour

In [23]:
def zone_color(zone):
    zones = ['#006400' ,'#008000' ,'#00FF00' , #dark_green,  green,         light green
             '#FFFF00' ,'#FFA500' ,'#FF4500' , #yellow,      orange-yellow, orange
             '#FF3300' ,'#FF0000' ,'#8B0000']  #dark orange, light red,     dark red
    return zones[zone-1]


tube_features = folium.FeatureGroup(name = "Tube Zones")

for index, row in routes_df.iterrows():
    colour = row.colour
    for line in row.geometry.geoms:
        folium.PolyLine(locations = list(line.coords), color = "#" + colour, opacity = 0.7).add_to(tube_features)
    
for index, row in tube_df.iterrows():
    folium.Circle(location = row.geometry.coords[0],opacity=1,radius=6, color = zone_color(int(tube_df.loc[index, ['zone']].values.tolist()[0]))).add_to(tube_features)

Now Lets see the boroughs in the map with the crimes committed in the last three years.

In [24]:
borough_dir = "../Data/boroughs_shape.csv"

crime_dir = "..\Data\MPS Borough Level Crime (most recent 24 months).csv"

The csv file is handmade based on a GIS public data for the shape of each borough

In [25]:
borough = pd.read_csv(borough_dir)
borough['geometry'] = borough['geometry'].apply(wkt.loads)
borough_df = gpd.GeoDataFrame(borough)
borough_df.set_geometry('geometry')

crime_df = pd.read_csv(crime_dir)

In [26]:
#we will drop 'Historical Fraud and Forgery' and 'Public Order Offences' since they don't really affect the person
crime_df = crime_df.drop(crime_df[crime_df['MajorText'] == "Public Order Offences"].index)
crime_df = crime_df.drop(crime_df[crime_df['MajorText'] == "Historical Fraud and Forgery"].index)

In [27]:
general_crime_boroughs = crime_df.groupby("LookUp_BoroughName")[crime_df.columns.to_list()[3:]].sum()
total_crime_boroughs = general_crime_boroughs.sum(axis=1)

#we normalized this values so that it is easier to see
normalized_crime = MinMaxScaler().fit_transform(total_crime_boroughs.values.reshape(-1, 1))
total_crime_boroughs = pd.Series(normalized_crime.flatten(), index= total_crime_boroughs.index)

In [29]:
choropleth = folium.Choropleth(name = 'Boroughs',geo_data = borough_df.__geo_interface__, 
           data = total_crime_boroughs, 
           key_on = 'feature.properties.name', 
           fill_color = 'YlGnBu', 
           legend_name = 'Boroughs', 
           fill_opacity=0.6,
           line_opacity=0.1
          )

In [30]:
map = folium.Map(location = [51.5074, -0.1278], zoom_start=11.5)

choropleth.add_to(map)

tube_features.add_to(map)

folium.LayerControl().add_to(map)

<folium.map.LayerControl at 0x19802bcd930>

In [31]:
def embedded_map(map: folium.Map, dir: str):
    map.save(dir)
    display(map)

embedded_map(map, output_dir)

Later, an option to visualize the metro stations and the crime rates in each borough will be added

In [32]:
#Uncomment to show teh map on your browser
#map.show_in_browser()