# Analysis on Aachen

### First of all I am importing/installing the necessary libraries

In [2]:
#Import libraries

import pandas as pd
import numpy as np
import json
from pandas.io.json import json_normalize

from bs4 import BeautifulSoup

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import requests

import matplotlib.cm as cm
import matplotlib.colors as colors

!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

### Find the coordinates of Aachen

In [3]:
address = 'Aachen, DE'

geolocator = Nominatim(user_agent="Aachen")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Aachen are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Aachen are 50.776351, 6.083862.


In [242]:
# create map of Aachen using latitude and longitude values
map_aachen = folium.Map(location=[latitude, longitude], zoom_start=15)
map_aachen

### Websearch of all districts

In [298]:
site = 'http://postleitzahlen.woxikon.de/plz/aachen'
page = requests.get(site)
soup = BeautifulSoup(page.content, 'lxml')
#print(soup.prettify())
table = soup.find_all('table')[0]
#print(table.prettify())


postaltable = pd.read_html(str(table))
postal = pd.DataFrame(postaltable[0])
postal.columns = ['Post Code','District','Street']
postal.head()

Unnamed: 0,Post Code,District,Street
0,52062,Aachen Aachen,Hirschgraben
1,52062,Aachen Aachen,Hermannstr.
2,52062,Aachen Aachen,Pontdriesch
3,52062,Aachen Aachen,Mostardstr.
4,52062,Aachen Aachen,Münsterplatz


### Dropping the street column & duplicates

In [299]:
postal = postal.drop(columns = ['Street'], axis = 1)
postal.shape

(1473, 2)

In [306]:
postal_copy = postal
aachen_grouped = postal_copy.drop_duplicates().reset_index(drop = True)
aachen_grouped.head()

Unnamed: 0,Post Code,District
0,52062,Aachen Aachen
1,52062,Aachen
2,52064,Aachen Aachen
3,52066,Aachen Aachen
4,52068,Aachen Aachen


### Define the url with personal id&secret in a hidden cell to call the foursquare api
#### unfortunately only 100 requests can be made

In [194]:
# The code was removed by Watson Studio for sharing.

### Send the GET Request

In [195]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e92027ab1cac0001b4449e2'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Aachen',
  'headerFullLocation': 'Aachen',
  'headerLocationGranularity': 'city',
  'totalResults': 132,
  'suggestedBounds': {'ne': {'lat': 50.78535100900001,
    'lng': 6.0980680846131134},
   'sw': {'lat': 50.76735099099999, 'lng': 6.069655915386886}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bb61ee7ef159c740d7075f7',
       'name': 'Aachener Dom St. Marien',
       'location': {'address': 'Klosterplatz 2',
        'lat': 50.774702036102134,
        'lng': 6.084102988243103,
        'labeledLatLngs': [{'label': 'display',
          'lat': 50.774702036102134,
 

### Creating a Dataframe with all venues

In [196]:
locations = results['response']['groups'][0]['items']
dataframe = json_normalize(locations)
dataframe

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,...,venue.location.formattedAddress,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4bb61ee7ef159c740d7075f7-0,"[{'id': '4bf58dd8d48988d132941735', 'name': 'C...",4bb61ee7ef159c740d7075f7,Klosterplatz 2,DE,Aachen,Deutschland,,...,"[Klosterplatz 2, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77470203610213...",50.774702,6.084103,52062,Nordrhein-Westfalen,Aachener Dom St. Marien,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4cc58565b2beb1f70818234c-1,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",4cc58565b2beb1f70818234c,Katschhof,DE,Aachen,Deutschland,,...,"[Katschhof, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77561116763609...",50.775611,6.083909,52062,Nordrhein-Westfalen,Katschhof,0,[],
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4c926a997f3b8cfa490ea51e-2,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P...",4c926a997f3b8cfa490ea51e,Domhof,DE,Aachen,Deutschland,,...,"[Domhof, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77465811299527...",50.774658,6.083297,52062,Nordrhein-Westfalen,Domhof,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4c44531f74ffc928ee5875cc-3,"[{'id': '4bf58dd8d48988d10b941735', 'name': 'F...",4c44531f74ffc928ee5875cc,Pontstr. 1-3,DE,Aachen,Deutschland,,...,"[Pontstr. 1-3, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77696662608818...",50.776967,6.083277,52062,Nordrhein-Westfalen,AKL,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4c82861751ada1cdfd3b1d10-4,"[{'id': '4bf58dd8d48988d123941735', 'name': 'W...",4c82861751ada1cdfd3b1d10,Kockerellstr. 13,DE,Aachen,Deutschland,,...,"[Kockerellstr. 13, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77628844245805...",50.776288,6.081469,52062,Nordrhein-Westfalen,Vertical Weinbar,0,[],
5,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-57c8319e498e299e3128a70c-5,"[{'id': '4bf58dd8d48988d16c941735', 'name': 'B...",57c8319e498e299e3128a70c,Am Markt 46,DE,Aachen,Deutschland,,...,"[Am Markt 46, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77638370134639...",50.776384,6.084331,52062,Nordrhein-Westfalen,BURGERISTA,0,[],
6,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-540ddaf9498e0b9b2c648951-6,"[{'id': '4bf58dd8d48988d146941735', 'name': 'D...",540ddaf9498e0b9b2c648951,Krämerstr. 5,DE,Aachen,Deutschland,,...,"[Krämerstr. 5, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77601585616779...",50.776016,6.084324,52062,Nordrhein-Westfalen,Ghorban Delikatessen Manufaktur,0,[],
7,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4c1121a9b93cc9b6edda57e0-7,"[{'id': '4bf58dd8d48988d129941735', 'name': 'C...",4c1121a9b93cc9b6edda57e0,Markt,DE,Aachen,Deutschland,,...,"[Markt, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77637784048129...",50.776378,6.083880,52062,Nordrhein-Westfalen,Rathaus,0,[],
8,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4b9b7cd1f964a5207e0a36e3-8,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",4b9b7cd1f964a5207e0a36e3,Münsterplatz 3,DE,Aachen,Deutschland,,...,"[Münsterplatz 3, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77457734420087...",50.774577,6.084693,52062,Nordrhein-Westfalen,Nobis Printen,0,[],
9,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4b8957c7f964a520472d32e3-9,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",4b8957c7f964a520472d32e3,Hof 1,DE,Aachen,Deutschland,,...,"[Hof 1, 52062 Aachen, Deutschland]","[{'label': 'display', 'lat': 50.77558836172494...",50.775588,6.085062,52062,Nordrhein-Westfalen,Domkeller,0,[],


In [197]:
# define the dataframe columns
column_names = ['Name', 'Address', 'Latitude', 'Longitude','Category'] 

# instantiate the dataframe
df_f = pd.DataFrame(columns=column_names)
df_f

# first venue on the list
dataframe['venue.name'][0]

'Aachener Dom St. Marien'

### Extracting relevant information

In [198]:
df = dataframe.loc[:,['venue.name','venue.location.address','venue.location.lat','venue.location.lng','venue.categories']]
df.columns = column_names
df.head()

Unnamed: 0,Name,Address,Latitude,Longitude,Category
0,Aachener Dom St. Marien,Klosterplatz 2,50.774702,6.084103,"[{'id': '4bf58dd8d48988d132941735', 'name': 'C..."
1,Katschhof,Katschhof,50.775611,6.083909,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P..."
2,Domhof,Domhof,50.774658,6.083297,"[{'id': '4bf58dd8d48988d164941735', 'name': 'P..."
3,AKL,Pontstr. 1-3,50.776967,6.083277,"[{'id': '4bf58dd8d48988d10b941735', 'name': 'F..."
4,Vertical Weinbar,Kockerellstr. 13,50.776288,6.081469,"[{'id': '4bf58dd8d48988d123941735', 'name': 'W..."


In [199]:
df_c = pd.DataFrame(['Category'])
categorylist = list()
rows, columns = df.shape

# looping through every category and append it to a list
for i in range(rows):
    category = df['Category'][i][0]['shortName']
    categorylist.append(category)
    #print(category)

# First category
df['Category'][0][0]['shortName']

'Church'

#### the next cell were just some test to extract the category out of the dataframe

In [200]:
# The code was removed by Watson Studio for sharing.

### creating a new dataframe with only the extracted categories

In [201]:
df_c = pd.DataFrame(categorylist, columns = ['Category'])
df_c.head()

Unnamed: 0,Category
0,Church
1,Plaza
2,Plaza
3,Falafel
4,Wine Bar


### override the existing category column with the extracted categories

In [202]:
df['Category'] = df_c['Category']
df.head(10)

Unnamed: 0,Name,Address,Latitude,Longitude,Category
0,Aachener Dom St. Marien,Klosterplatz 2,50.774702,6.084103,Church
1,Katschhof,Katschhof,50.775611,6.083909,Plaza
2,Domhof,Domhof,50.774658,6.083297,Plaza
3,AKL,Pontstr. 1-3,50.776967,6.083277,Falafel
4,Vertical Weinbar,Kockerellstr. 13,50.776288,6.081469,Wine Bar
5,BURGERISTA,Am Markt 46,50.776384,6.084331,Burgers
6,Ghorban Delikatessen Manufaktur,Krämerstr. 5,50.776016,6.084324,Deli / Bodega
7,Rathaus,Markt,50.776378,6.08388,City Hall
8,Nobis Printen,Münsterplatz 3,50.774577,6.084693,Bakery
9,Domkeller,Hof 1,50.775588,6.085062,Pub


In [203]:
print('There are {} uniques categories.'.format(len(df['Category'].unique())))

There are 57 uniques categories.


In [237]:
# Disply Categories in columns
aachen_onehot = pd.get_dummies(df[['Category']], prefix="", prefix_sep="")

fixed_columns = [aachen_onehot.columns[-1]] + list(aachen_onehot.columns[:-1])
aachen_onehot = aachen_onehot[fixed_columns]

aachen_onehot.head(0)

Unnamed: 0,Wine Bar,Asian,Bagels,Bakery,Bar,Beer Store,Bookstore,Burgers,Café,Chinese,...,Snacks,South American,Spa,Steakhouse,Supermarket,Sushi,Theater,Turkish,Vegetarian / Vegan,Vietnamese


In [238]:
aachen_onehot.shape

(100, 57)

### The top 5 location types

In [236]:
types = pd.DataFrame(aachen_onehot.sum(axis =0), columns = ['Count'])
sortedtype = types.sort_values(by=['Count'], ascending=False)
sortedtype.head(5)

Unnamed: 0,Count
Bar,7
Bakery,6
Café,5
Italian,5
Coffee Shop,5
