# Final Capstone Project

### Business problem description

During the worldwide corona crisis, it became clear that nightlife venues, restaurants and bars can be the sources of continued spread of the virus. Many countries are struggling to define clear concepts to protect their citizens. The aim of this analysis will be a generate a risk assessment for each neighborhood in the city of Munich. 

Based on the risk assessment one can concetrate efforts and enforce the rules where it is most necessary.

For this task I will leverage the Foursquare API and available puplic data for the city of Munich. I will cluster the neighborhoods based on the type, density and size of venues and create an overview map of high & low risk areas. 

In [1]:
import pandas as pd
import folium
import bs4 as bs
import urllib.request
from geopy.geocoders import Nominatim 
import requests
from pandas.io.json import json_normalize

### Scrape city district data from Wikipedia, clean and translate columns

In [2]:
source = urllib.request.urlopen('https://de.wikipedia.org/wiki/Stadtbezirke_M%C3%BCnchens').read()
soup = bs.BeautifulSoup(source)

table = soup.find_all('table',{"class": "wikitable sortable"})
df = pd.read_html(str(table))[0]

In [3]:
df=df.drop(labels=["Nr.","Ausländer(%)"], axis=1)
df=df.drop(25)
df=df.rename(columns={"Stadtbezirk": "District", "Fläche(km²)": "Area(km²)", "Einwohner": "Inhabitants","Dichte(Einw./km²)":"Density(Inhab./km²)"})

### Munich has 24 districts and a total of ca. 1.5m inhabitants

In [4]:
df

Unnamed: 0,District,Area(km²),Inhabitants,Density(Inhab./km²)
0,Altstadt-Lehel,315,21.1,6.708
1,Ludwigsvorstadt-Isarvorstadt,440,51.644,11.734
2,Maxvorstadt,430,51.402,11.96
3,Schwabing-West,436,68.527,15.706
4,Au-Haidhausen,422,61.356,14.541
5,Sendling,394,40.983,10.405
6,Sendling-Westpark,781,59.643,7.632
7,Schwanthalerhöhe,207,29.743,14.367
8,Neuhausen-Nymphenburg,1291,98.814,7.651
9,Moosach,1109,54.223,4.888


In [5]:
from geopy.geocoders import Nominatim 

location_dic={}

for row in df.itertuples():
    address = row[1]+', Munich Germany'
    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    location_dic[row[1]]=[latitude,longitude]    
df_loc = pd.DataFrame.from_dict(location_dic, orient='index', columns=["Latitude","Longitude"])
df_loc=df_loc.reset_index()
df_loc=df_loc.rename(columns={"index":"District"})

In [6]:
df_merged=df_loc.merge(df,on="District")

In [7]:
df_merged

Unnamed: 0,District,Latitude,Longitude,Area(km²),Inhabitants,Density(Inhab./km²)
0,Altstadt-Lehel,48.137828,11.574582,315,21.1,6.708
1,Ludwigsvorstadt-Isarvorstadt,48.131771,11.555809,440,51.644,11.734
2,Maxvorstadt,48.151092,11.562418,430,51.402,11.96
3,Schwabing-West,48.164849,11.5635,436,68.527,15.706
4,Au-Haidhausen,48.128753,11.590536,422,61.356,14.541
5,Sendling,48.118012,11.539083,394,40.983,10.405
6,Sendling-Westpark,48.118031,11.519333,781,59.643,7.632
7,Schwanthalerhöhe,48.133782,11.541057,207,29.743,14.367
8,Neuhausen-Nymphenburg,48.154222,11.531517,1291,98.814,7.651
9,Moosach,48.179895,11.510571,1109,54.223,4.888


In [8]:
CLIENT_ID = 'FQ2IDDRDQHPLRZPINBZP2U2NMNIRUV0RZ01Z4EPZKQGM13DM' # your Foursquare ID
CLIENT_SECRET = 'ZCMNXOPSI15W1OQ0WAKVOEREL0MYYJBY2PRBCVRUMHAQOYW3' # your Foursquare Secret

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FQ2IDDRDQHPLRZPINBZP2U2NMNIRUV0RZ01Z4EPZKQGM13DM
CLIENT_SECRET:ZCMNXOPSI15W1OQ0WAKVOEREL0MYYJBY2PRBCVRUMHAQOYW3


In [20]:
search_query = '4d4b7105d754a06376d81259'
radius = 1000
VERSION = '20180604'
df_merged["Number_of_Bars"]=0
limit=200



for row in df_merged.itertuples():
    index = row[0]
    latitude = row[2]
    longitude = row[3]
    url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&categoryId={}&limit={}&radius={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, limit,radius)
    results = requests.get(url).json()
    venues = results['response']['venues']
    df_merged["Number_of_Bars"][index]=len(venues)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


In [21]:
df_merged

Unnamed: 0,District,Latitude,Longitude,Area(km²),Inhabitants,Density(Inhab./km²),Number_of_Bars,Bar_Density_Index,corona_risk,marker_colors
0,Altstadt-Lehel,48.137828,11.574582,315,21.1,6.708,50,15.555556,high,yellow
1,Ludwigsvorstadt-Isarvorstadt,48.131771,11.555809,440,51.644,11.734,50,10.681818,high,yellow
2,Maxvorstadt,48.151092,11.562418,430,51.402,11.96,48,11.395349,high,yellow
3,Schwabing-West,48.164849,11.5635,436,68.527,15.706,48,5.963303,medium,green
4,Au-Haidhausen,48.128753,11.590536,422,61.356,14.541,49,10.900474,high,yellow
5,Sendling,48.118012,11.539083,394,40.983,10.405,47,5.837563,medium,green
6,Sendling-Westpark,48.118031,11.519333,781,59.643,7.632,19,0.640205,low,blue
7,Schwanthalerhöhe,48.133782,11.541057,207,29.743,14.367,46,22.705314,very high,red
8,Neuhausen-Nymphenburg,48.154222,11.531517,1291,98.814,7.651,47,1.936483,low,blue
9,Moosach,48.179895,11.510571,1109,54.223,4.888,23,0.991885,low,blue


In [22]:
df_merged["Bar_Density_Index"]=(df_merged["Number_of_Bars"]/df_merged["Area(km²)"])*100

In [23]:
df_merged

Unnamed: 0,District,Latitude,Longitude,Area(km²),Inhabitants,Density(Inhab./km²),Number_of_Bars,Bar_Density_Index,corona_risk,marker_colors
0,Altstadt-Lehel,48.137828,11.574582,315,21.1,6.708,50,15.873016,high,yellow
1,Ludwigsvorstadt-Isarvorstadt,48.131771,11.555809,440,51.644,11.734,50,11.363636,high,yellow
2,Maxvorstadt,48.151092,11.562418,430,51.402,11.96,48,11.162791,high,yellow
3,Schwabing-West,48.164849,11.5635,436,68.527,15.706,48,11.009174,medium,green
4,Au-Haidhausen,48.128753,11.590536,422,61.356,14.541,49,11.611374,high,yellow
5,Sendling,48.118012,11.539083,394,40.983,10.405,47,11.928934,medium,green
6,Sendling-Westpark,48.118031,11.519333,781,59.643,7.632,19,2.432778,low,blue
7,Schwanthalerhöhe,48.133782,11.541057,207,29.743,14.367,46,22.222222,very high,red
8,Neuhausen-Nymphenburg,48.154222,11.531517,1291,98.814,7.651,47,3.640589,low,blue
9,Moosach,48.179895,11.510571,1109,54.223,4.888,23,2.07394,low,blue


In [24]:
df_merged['corona_risk'] = pd.cut(df_merged['Bar_Density_Index'], bins=4, 
                              labels=['low', 'medium', 'high', 'very high'])
df_merged['marker_colors'] = pd.cut(df_merged['Bar_Density_Index'], bins=4, 
                              labels=['blue', 'green', 'yellow', 'red'])

In [25]:
df_merged

Unnamed: 0,District,Latitude,Longitude,Area(km²),Inhabitants,Density(Inhab./km²),Number_of_Bars,Bar_Density_Index,corona_risk,marker_colors
0,Altstadt-Lehel,48.137828,11.574582,315,21.1,6.708,50,15.873016,high,yellow
1,Ludwigsvorstadt-Isarvorstadt,48.131771,11.555809,440,51.644,11.734,50,11.363636,high,yellow
2,Maxvorstadt,48.151092,11.562418,430,51.402,11.96,48,11.162791,medium,green
3,Schwabing-West,48.164849,11.5635,436,68.527,15.706,48,11.009174,medium,green
4,Au-Haidhausen,48.128753,11.590536,422,61.356,14.541,49,11.611374,high,yellow
5,Sendling,48.118012,11.539083,394,40.983,10.405,47,11.928934,high,yellow
6,Sendling-Westpark,48.118031,11.519333,781,59.643,7.632,19,2.432778,low,blue
7,Schwanthalerhöhe,48.133782,11.541057,207,29.743,14.367,46,22.222222,very high,red
8,Neuhausen-Nymphenburg,48.154222,11.531517,1291,98.814,7.651,47,3.640589,low,blue
9,Moosach,48.179895,11.510571,1109,54.223,4.888,23,2.07394,low,blue


In [26]:

map_munich = folium.Map(location=[48.1, 11.5], zoom_start=10)
for lat, lng, district, corona_risk, marker_color in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['District'], df_merged['corona_risk'],df_merged["marker_colors"]):
    label = '{}, {}'.format(district, corona_risk)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=marker_color,
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_munich)  


map_munich