# Hunt for Indian Restaurants in AUSTRIA

### Import all the Libraries needed for this notebook.

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json # library to handle JSON files
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge
Libraries imported.


### We copied the URL for cities in Austria to our GitHub repository. Lets download it from there.

In [2]:
#Download the URL from wikipedia page
url  = "https://github.com/Ekdeep0712/Capstone-Project-Coursera/blob/master/at.csv"
page = requests.get(url)
if page.status_code == 200:
    print('URL downloaded successfully')
else:
    print('ERROR in Downloading. Error code: {}'.format(page.status_code))



URL downloaded successfully


### Lets check the HTML dataset and assign it to name "df_at".

In [3]:
df_at = pd.read_html(url, header=0, na_values = ['Not assigned'])[0]
df_at.head()



                        





Unnamed: 0.1,Unnamed: 0,city,lat,lng,country,iso2,admin,capital,population,population_proper
0,,Vienna,48.2,16.366667,Austria,AT,Wien,primary,2400000.0,1731000.0
1,,Linz,48.3,14.3,Austria,AT,Oberösterreich,admin,349161.0,181162.0
2,,Graz,47.066667,15.45,Austria,AT,Steiermark,admin,263234.0,222326.0
3,,Salzburg,47.8,13.033333,Austria,AT,Salzburg,admin,206279.0,150269.0
4,,Innsbruck,47.266667,11.4,Austria,AT,Tirol,admin,155214.0,112467.0


### Rename the Column names, which we want for the future.

In [4]:
df_at = df_at.rename(columns={'city': 'City', 'lat': 'Latitude' , 'lng': 'Longitude'})
df_at.head()

Unnamed: 0.1,Unnamed: 0,City,Latitude,Longitude,country,iso2,admin,capital,population,population_proper
0,,Vienna,48.2,16.366667,Austria,AT,Wien,primary,2400000.0,1731000.0
1,,Linz,48.3,14.3,Austria,AT,Oberösterreich,admin,349161.0,181162.0
2,,Graz,47.066667,15.45,Austria,AT,Steiermark,admin,263234.0,222326.0
3,,Salzburg,47.8,13.033333,Austria,AT,Salzburg,admin,206279.0,150269.0
4,,Innsbruck,47.266667,11.4,Austria,AT,Tirol,admin,155214.0,112467.0


### Now, lets drop the columns which we don't need and check our Dataframe again .

In [5]:
df_at = df_at.drop(["Unnamed: 0", "iso2", "admin", "capital", "population", "population_proper"], axis=1) 
df_at.head()

Unnamed: 0,City,Latitude,Longitude,country
0,Vienna,48.2,16.366667,Austria
1,Linz,48.3,14.3,Austria
2,Graz,47.066667,15.45,Austria
3,Salzburg,47.8,13.033333,Austria
4,Innsbruck,47.266667,11.4,Austria


### Lets check the total number of "Cities" in our Dataframe

In [6]:
print('The dataframe has {} cities.'.format(
        len(df_at['City'].unique()),
        df_at.shape[0]
    )
)

The dataframe has 80 cities.


### We will see the Coordinates of Austria by taking the average Lat. & Lon. from our dataset.

In [7]:
latitude = df_at['Latitude'].mean()
longitude = df_at['Longitude'].mean()
print('The geographical coord. of Austria are {}, {}'.format(latitude, longitude))

The geographical coord. of Austria are 47.680697825000024, 14.489297787500004


### Now, lets create a map of Austria using coordinates

In [8]:
# create map of New York using latitude and longitude values
map_austria = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, country, City in zip(df_at['Latitude'], df_at['Longitude'], df_at['country'], df_at['City']):
    label = '{}, {}'.format(country, City)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_austria) 
    
map_austria

### Next, we are going to start utilizing the Foursquare API to explore the Cities and segment them

In [9]:
CLIENT_ID = '0PMSDWY3ZHV4JTFZFTN005OP2NLXDLN5RO30HXJI3EZZ23YC' # your Foursquare ID
CLIENT_SECRET = 'P2NX4BJ2OY2VX03WDBVY5HDK5CY1Z30EPG2IIRBQDPCSEXFX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0PMSDWY3ZHV4JTFZFTN005OP2NLXDLN5RO30HXJI3EZZ23YC
CLIENT_SECRET:P2NX4BJ2OY2VX03WDBVY5HDK5CY1Z30EPG2IIRBQDPCSEXFX


### Now, let's get the top 50 venues that are in Austria within a radius of 5000 meters.

In [10]:
LIMIT = 50 # limit of number of venues returned by Foursquare API
radius = 5000

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=0PMSDWY3ZHV4JTFZFTN005OP2NLXDLN5RO30HXJI3EZZ23YC&client_secret=P2NX4BJ2OY2VX03WDBVY5HDK5CY1Z30EPG2IIRBQDPCSEXFX&v=20180605&ll=47.680697825000024,14.489297787500004&radius=5000&limit=50'

### And, we will use the GET function to see our results.

In [11]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ca8ce8a351e3d25e2c9b3a5'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-4e8874f9e5fa8d83e21fbf6d-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/hikingtrail_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d159941735',
         'name': 'Trail',
         'pluralName': 'Trails',
         'primary': True,
         'shortName': 'Trail'}],
       'id': '4e8874f9e5fa8d83e21fbf6d',
       'location': {'cc': 'AT',
        'city': 'Rosenau am Hengstpaß',
        'country': 'Österreich',
        'distance': 1873,
        'formattedAddress': ['Rosenau am Hengstpaß', 'Österreich'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 47.69108653884442,
          'lng': 14.469629514078797}],
        'lat

### From the Foursquare lab in the previous module, we know that all the information is in the items key. Before we proceed, let's borrow the get_category_type function from the Foursquare lab.

In [12]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
     
    else:
        return categories_list[0]['name']

### Now we are ready to clean the json and structure it into a pandas dataframe.

In [13]:
venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Menauer Alm,Trail,47.691087,14.46963
1,Schwarzkogel,Ski Area,47.699061,14.482833
2,Großer Mitterberg,Mountain,47.686908,14.452157
3,Zicker-Reith,Food,47.704011,14.457584


### And how many venues were returned by Foursquare?


In [14]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


### Let's create a function to repeat the same process in all cities of Austria

In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Create a new dataframe called Austria_venues

In [16]:
Austria_venues = getNearbyVenues(names=df_at['City'],
                                latitudes=df_at['Latitude'],
                                longitudes=df_at['Longitude'])

Vienna
Linz
Graz
Salzburg
Innsbruck
Klagenfurt
Wiener Neustadt
Bregenz
Eisenstadt
Dornbirn
Deutschlandsberg
Hartberg
Vöcklabruck
Judenburg
Villach
Sankt Johann im Pongau
Voitsberg
Zwettl
Scheibbs
Kirchdorf
Gmunden
Hollabrunn
Wels
Gmünd
Völkermarkt
Kitzbühel
Freistadt
Tulln
Schwaz
Tamsweg
Mistelbach
Zell am See
Sankt Pölten
Horn
Leibnitz
Perg
Amstetten
Korneuburg
Rohrbach
Lienz
Hermagor
Reutte
Bruck an der Mur
Bruck an der Leitha
Imst
Leoben
Sankt Veit an der Glan
Gänserndorf
Melk
Hallein
Mattersburg
Feldkirchen
Murau
Feldbach
Bludenz
Krems an der Donau
Mödling
Klosterneuburg
Steyr
Neusiedl am See
Neunkirchen
Wolfsberg
Landeck
Eferding
Weiz
Spittal an der Drau
Waidhofen an der Thaya
Rust
Baden
Liezen
Oberwart
Ried im Innkreis
Oberpullendorf
Güssing
Grieskirchen
Schärding
Lilienfeld
Jennersdorf
Kufstein
Braunau am Inn


### Let's check the size of the resulting dataframe

In [17]:
Austria_venues.shape


(1196, 7)

In [18]:
Austria_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Vienna,48.2,16.366667,Cha No Ma,48.199046,16.365225,Tea Room
1,Vienna,48.2,16.366667,Karlsplatz,48.200436,16.368513,Plaza
2,Vienna,48.2,16.366667,Kojiro - Sushi Bar,48.199067,16.364688,Sushi Restaurant
3,Vienna,48.2,16.366667,Motel One Wien-Staatsoper,48.201792,16.367823,Hotel
4,Vienna,48.2,16.366667,Käseland,48.199311,16.364834,Cheese Shop


### Analyze Indian Restaurants in Each Neighborhood

In [19]:
df_rest = Austria_venues[Austria_venues['Venue Category'].str.contains('Indian')]
df_rest.reset_index(inplace=True)
df_rest.drop('index', axis=1, inplace=True)
df_rest

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Vienna,48.2,16.366667,Nam Nam Dabba,48.198587,16.366008,Indian Restaurant
1,Linz,48.3,14.3,Royal Bombay Palace,48.29734,14.29687,Indian Restaurant
2,Innsbruck,47.266667,11.4,Sahib Restaurant,47.268826,11.39874,Indian Restaurant
3,Sankt Pölten,48.2,15.633333,Rajput,48.204558,15.627709,Indian Restaurant


In [20]:
map_rest = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, country, City in zip(df_rest['Venue Latitude'], df_rest['Venue Longitude'], df_rest['Neighborhood'], df_rest['Venue']):
    label = '{}, {}'.format(country, City)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_rest) 
    
map_rest


### Lets check the size of out dataframe.

In [21]:
df_rest.shape

(4, 7)

### Just to make sure the total Restaurants per neighborhood we will use GroupBy function.

In [22]:
print(df_rest.groupby('Neighborhood').count()['Venue'])

Neighborhood
Innsbruck       1
Linz            1
Sankt Pölten    1
Vienna          1
Name: Venue, dtype: int64


## Now, we want to know the ratings of all Indian Restaurants just to make sure that, which Indian restaurant our client should visit to explore the indian food in Austria and make their next Food Blog.  

### Get the venue's overall rating - Nam Nam Dabba (VIENNA)

In [23]:
venue_id = '4c40af3cff711b8d23c51005' # ID of Nam Nam Dabba Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

result = requests.get(url).json()
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

8.0


### Get the venue's overall rating - Royal Bombay Palace (LINZ)


In [24]:
venue_id = '4b55da77f964a520fdf227e3' # ID of Royal Bombay Palace Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

result = requests.get(url).json()
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

8.1


### Get the venue's overall rating - Sahib Restaurant (INNSBRUCK)


In [25]:
venue_id = '4d446bcb4e5d37047b53eb93' # ID of Sahib Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

result = requests.get(url).json()
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

7.7


### Get the venue's overall rating - Rajput (SANKT POLTEN)

In [26]:
venue_id = '4c2dd6a54e132d7fb9029ddf' # ID of Rajput Restaurant
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)

result = requests.get(url).json()
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

7.8


### Now we create a new data frame with Restaurant names and ratings

In [30]:
df_rest['Ratings'] = [8.0, 8.1, 7.7, 7.8]
df_rest = df_rest.drop(["Neighborhood Latitude", "Neighborhood Longitude", "Venue Latitude", "Venue Longitude", "Venue Category"], axis=1) 
df_rest

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,Neighborhood,Venue,Ratings
0,Vienna,Nam Nam Dabba,8.0
1,Linz,Royal Bombay Palace,8.1
2,Innsbruck,Sahib Restaurant,7.7
3,Sankt Pölten,Rajput,7.8


## So, from above research about the Indian Restaurants in AUSTRIA, we recommend our client to visit "Royal Bombay Palace" in LINZ for making their Food Blogs. As, they both have best ratings(8.1) in compare to other Indian Restaurants in AUSTRIA.