In [1]:
import pandas as pd
from pandas.io.json import json_normalize
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

### CSV file is imported into the datfarame.

In [2]:
df_usa_cities=pd.read_csv('usa_cities.csv')

In [3]:
df_usa_cities.head(5)

Unnamed: 0,city,city_ascii,lat,lng,pop,country,iso2,iso3,province,abbr
0,Calais,Calais,45.165989,-67.242392,1781.5,United States of America,US,USA,Maine,
1,Houlton,Houlton,46.125517,-67.83972,6051.5,United States of America,US,USA,Maine,
2,Presque Isle,Presque Isle,46.793409,-68.002165,9466.0,United States of America,US,USA,Maine,
3,Bar Harbor,Bar Harbor,44.387897,-68.204375,4483.5,United States of America,US,USA,Maine,
4,Bangor,Bangor,44.801153,-68.778345,40843.0,United States of America,US,USA,Maine,


###  data is preprocessed to remove unwanted columns

In [4]:
df_usa_cities2=df_usa_cities.drop(columns=["city_ascii", "pop", "iso2", "iso3", "abbr", "country"])
df_usa_cities2.head(5)

Unnamed: 0,city,lat,lng,province
0,Calais,45.165989,-67.242392,Maine
1,Houlton,46.125517,-67.83972,Maine
2,Presque Isle,46.793409,-68.002165,Maine
3,Bar Harbor,44.387897,-68.204375,Maine
4,Bangor,44.801153,-68.778345,Maine


In [5]:
CLIENT_ID = 'AE0AFAZCS2PI00EKL1CBBHDPPML4U3YWCTEI2OAFCM0O4QS1' # your Foursquare ID
CLIENT_SECRET ='2OV2250115TEYHYBEHC1LVRPJI32FR1GPWQ2RZ13AFK05G1G' # your Foursquare Secret
VERSION = '20190617' # Foursquare API version

In [6]:
df_usa_cities2.shape


(769, 4)

### code for finding venue and getting the count of restaurant for each city

In [7]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [9]:
session = requests.Session()
retry = Retry(connect=2, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

num_ind_rest=[]
for one_city in zip(df_usa_cities2['lat'],df_usa_cities2['lng'],df_usa_cities2['city']):
    #print(str(one_city))
    city_Check=str(one_city[2]).lower()
    neighborhood_latitude=one_city[0]
    neighborhood_longitude=one_city[1]
    radius=1500
    LIMIT=100
    search_query = 'Indian'
    url= 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neighborhood_latitude, neighborhood_longitude, VERSION, search_query, radius, LIMIT)
    results=session.get(url).json()
    #print("got the url")
    if len(results['response']['venues'])==0:
        num_ind_rest.append(0)
    else:
        venues = results['response']['venues']
        dataframe = json_normalize(venues)
        if dataframe.empty:
            num_ind_rest.append(0)
        else:
            # keep only columns that include venue name, and anything that is associated with location
            filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
            dataframe_filtered = dataframe.loc[:, filtered_columns]
            # filter the category for each row
            dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)
            # clean column names by keeping only last term
            dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]
            dsa=dataframe_filtered.loc[dataframe_filtered['categories']=='Indian Restaurant']
            if dsa.empty:
                num_ind_rest.append(0)
            else:    
                num_ind_rest.append(len(dsa.index))
                            
df_usa_cities2['Number of Indian Restaurants']=num_ind_rest
df_usa_cities2.head(5)

Unnamed: 0,city,lat,lng,province,Number of Indian Restaurants
0,Calais,45.165989,-67.242392,Maine,0
1,Houlton,46.125517,-67.83972,Maine,0
2,Presque Isle,46.793409,-68.002165,Maine,0
3,Bar Harbor,44.387897,-68.204375,Maine,1
4,Bangor,44.801153,-68.778345,Maine,0


###  Sorting of data in descending order

In [10]:
df_usa_cities3=df_usa_cities2.sort_values(by='Number of Indian Restaurants',ascending=0)
df_usa_cities3=df_usa_cities3.reset_index()
del df_usa_cities3['index']
df_usa_cities4=df_usa_cities3.drop(columns=["lat", "lng"])
df_usa_cities4.head(10)

Unnamed: 0,city,province,Number of Indian Restaurants
0,New York,New York,30
1,Berkeley,California,6
2,Pittsburgh,Pennsylvania,4
3,Portland,Oregon,4
4,"Washington, D.C.",District of Columbia,4
5,Syracuse,New York,3
6,Monterey,California,3
7,Minneapolis,Minnesota,3
8,Morgantown,West Virginia,2
9,San Francisco,California,2
