# Part 2 Get Foursquare Data
In this notebook I will use the location data for the top 100 metropilitan areas gathered in Part 1.  
Using calls to the Foursquare APIs Explore function, I will get up to 100 venues that are 'Top Picks' for each metro area.  
From that information, I will create three new dataframes and json files.  

The first dataframe will contain the locations and the list of the 10 most frequently occuring venue categories for each metro area.  
The second dataframe will contain all venue categories and the ratio of the frequency that each venue category occured in the data retured from Foursquare.  
The third dataframe will contain all venue categories and the count of the frequency that each venue category occured in the data retured from Foursquare. 

<table style="width:100%">
    <tr>
        <td style="text-align: left">
            <h2><a href="https://nbviewer.jupyter.org/github/KathrynDH/IBMCapstoneFinalProject/blob/master/Final%20Project%20Get%20Data.ipynb">&larr; Part 1</a></h2>
        </td>
        <td style="text-align: right">
            <h2><a href="https://nbviewer.jupyter.org/github/KathrynDH/IBMCapstoneFinalProject/blob/master/Explore%20Location%20Data.ipynb">Part 3 &rarr;</a></h2>
        </td>
    </tr>
</table>
    
                

In [1]:
#import libraries
import numpy as np
import pandas as pd
import requests 
import io
import ibm_boto3
from pandas.io.json import json_normalize
from datetime import datetime

In [2]:
# The code was removed by Watson Studio for sharing.

In [3]:
#Import Metro dataframe
obj = resource.Object(bucket_name=bucket, key='MetroData.json').get()
df = pd.read_json(io.BytesIO(obj['Body'].read()))
df.head()

Unnamed: 0,Metropolitan,Country,Continent,Latitude,Longitude
0,Tokyo,Japan,Asia,35.682839,139.759455
1,Delhi,India,Asia,28.651718,77.221939
10,Mexico City,Mexico,North America,19.432601,-99.133342
11,São Paulo,Brazil,South America,-23.550651,-46.633382
12,Lagos,Nigeria,Africa,6.455057,3.394179


In [4]:
# The code was removed by Watson Studio for sharing.

Credentails set


In [5]:
#Function gets venue data for a location and returns dataframe with info from API request
def get_data(lat,lng):
    LIMIT = 100
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}&section=topPicks&time=any&day=any'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng, 
        LIMIT)
            
    # make the GET request
    results = requests.get(url).json()
    #results
    #print(results)
    try:
        df_4s = json_normalize(results['response']['groups'][0]['items'])
    except:
        results = requests.get(url).json()
        df_4s = json_normalize(results['response']['groups'][0]['items'])
    return df_4s

In [6]:
#function gets the category of the venues and returns a list of categories from the Foursquare data
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [7]:
#function gets the venues for the location and returns a dataframe with list of venue categories from the Foursquare data
def get_venues(df_4s):
    # filter columns
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in df_4s.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = df_4s.loc[:, filtered_columns]
    # filter the category for each row
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)
    # clean columns
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]
    return dataframe_filtered

In [8]:
#function creates a dataframe that has venue categories ordered from most frequent to least frequent and returns the 
#10 most frequently occuring categories
def get_categories(dataframe_filtered):
    df_New = pd.DataFrame({'Category':dataframe_filtered.categories, 'Name':dataframe_filtered.name})
    df_New = df_New.groupby(['Category'],as_index = False).count()
    df_New.sort_values(by=['Name'], ascending=False, inplace=True)
    df_New.reset_index(drop = True, inplace=True)
    return df_New.head(10)

In [9]:
#Function takes a row index number creates a new row for the dataframe 
def create_row(i):
    new_row = []
    #Enter location infomation into the row
    for j in range(5):
        new_row.append(df.iloc[i,j])
    #Enter top venue categories into the row
    for k in range(10):
        try:
            new_row.append(df_New.iloc[k,0])
        except:
            new_row.append(np.NAN)
    return new_row

In [10]:
#Create the venue dataframes
#column names
colNames = ['Metropolitan','Country', 'Continent','Latitude', 'Longitude']
for i in range(10):
    colNames.append('Category ' + str(i+1))

#Create the empty dataframes 
df_cat = pd.DataFrame(columns= colNames)
df_onehot = pd.DataFrame(columns= ['Metro','Category'])

for n in range(len(df.index)):
#range(len(df.index)):
    lat = df.iloc[n,3]
    lng = df.iloc[n,4]
    df_ven = get_venues(get_data(lat,lng))
    
    # Create dataframe used for one hot encoding
    df_onehot = df_onehot.append(pd.DataFrame({'Metro':df.iloc[n,0],'Category':df_ven.categories}),ignore_index=True)
    
    #Create top categoies dataframe
    df_New = get_categories(df_ven)
    df_cat = df_cat.append(pd.DataFrame([create_row(n)], columns= colNames), ignore_index=True)

#Create a dataframe with columns for all categoires and store the ratio of 
#returned venues in that category for each metro area
df_onehot2 = pd.concat([df_onehot,pd.get_dummies(df_onehot['Category'])],axis=1)
df_onehot_grouped = df_onehot2.groupby('Metro',as_index=False).mean()

In [12]:
print(df_onehot_grouped.shape)
df_onehot_grouped.head()

(100, 393)


Unnamed: 0,Metro,ATM,Acai House,Accessories Store,Afghan Restaurant,African Restaurant,Alsatian Restaurant,American Restaurant,Amphitheater,Antique Shop,...,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Yoshoku Restaurant,Zhejiang Restaurant,Zoo Exhibit
0,Ahmedabad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Alexandria,0.0,0.0,0.0,0.0,0.0,0.0,0.010309,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Ankara,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
3,Atlanta,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,...,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bandung,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df_countCat = df_onehot2.groupby('Metro',as_index=False).sum()
df_countCat.head()

Unnamed: 0,Metro,ATM,Acai House,Accessories Store,Afghan Restaurant,African Restaurant,Alsatian Restaurant,American Restaurant,Amphitheater,Antique Shop,...,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Yoshoku Restaurant,Zhejiang Restaurant,Zoo Exhibit
0,Ahmedabad,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Alexandria,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Ankara,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,0,0,0,0
3,Atlanta,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
4,Bandung,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
print(df_cat.shape)
df_cat.head()

(100, 15)


Unnamed: 0,Metropolitan,Country,Continent,Latitude,Longitude,Category 1,Category 2,Category 3,Category 4,Category 5,Category 6,Category 7,Category 8,Category 9,Category 10
0,Tokyo,Japan,Asia,35.682839,139.759455,Café,Japanese Restaurant,Italian Restaurant,Sushi Restaurant,Lounge,Garden,French Restaurant,Dessert Shop,Thai Restaurant,Steakhouse
1,Delhi,India,Asia,28.651718,77.221939,Indian Restaurant,Bar,Café,Flea Market,Lounge,Asian Restaurant,Ice Cream Shop,South Indian Restaurant,Portuguese Restaurant,Plaza
2,Mexico City,Mexico,North America,19.432601,-99.133342,Mexican Restaurant,Art Museum,Museum,Arts & Crafts Store,Bar,Ice Cream Shop,Boutique,Bakery,Restaurant,Café
3,São Paulo,Brazil,South America,-23.550651,-46.633382,Brazilian Restaurant,Japanese Restaurant,Café,Bakery,Snack Place,Bookstore,Cosmetics Shop,Asian Restaurant,Art Gallery,Chinese Restaurant
4,Lagos,Nigeria,Africa,6.455057,3.394179,Lounge,African Restaurant,Bar,Café,Shopping Mall,Pizza Place,Hotel,Art Gallery,Italian Restaurant,Market


In [21]:
#Save dataframes to json files
resource.Bucket(name=bucket).put_object(Key='VenueData.json', Body=df_cat.to_json())
resource.Bucket(name=bucket).put_object(Key='VenueOneHot.json', Body=df_onehot_grouped.to_json())
resource.Bucket(name=bucket).put_object(Key='VenueCatCount.json', Body=df_countCat.to_json())
print('Saved')

Saved


In [22]:
#make backups of the files
t = datetime.now().strftime('%Y%m%d_%H%M')
resource.Bucket(name=bucket).put_object(Key='VenueData' + t + '.json', Body=df_cat.to_json())
resource.Bucket(name=bucket).put_object(Key='VenueOneHot' + t + '.json', Body=df_onehot_grouped.to_json())
resource.Bucket(name=bucket).put_object(Key='VenueCatCount' + t + '.json', Body=df_countCat.to_json())

print('Saved ' + t)

Saved 20190916_1450


<table style="width:100%">
    <tr>
        <td style="text-align: left">
            <h2><a href="https://nbviewer.jupyter.org/github/KathrynDH/IBMCapstoneFinalProject/blob/master/Final%20Project%20Get%20Data.ipynb">&larr; Part 1</a></h2>
        </td>
        <td style="text-align: right">
            <h2><a href="https://nbviewer.jupyter.org/github/KathrynDH/IBMCapstoneFinalProject/blob/master/Explore%20Location%20Data.ipynb">Part 3 &rarr;</a></h2>
        </td>
    </tr>
</table>
    