## DATA 620

---
### Project 1
### Mael Illien and Jeremy OBrien

Reference materials:
- [Foursquare Endpoints Overview](https://developer.foursquare.com/docs/places-api/endpoints/)
- [Foursquare Places API Next Venues](https://developer.foursquare.com/docs/api-reference/venues/nextvenues/)
- [Foursquare Places API Venue Categories](https://developer.foursquare.com/docs/api-reference/venues/categories/)
- [Foursquare Places API Authentication](https://developer.foursquare.com/docs/places-api/authentication/)
- [Building a Foursquare Location Graph](https://nbviewer.jupyter.org/github/furukama/IPythonNotebooks/blob/master/Building%20a%20Foursquare%20Location%20Graph.ipynb)
- [How to create a location graph from the Foursquare API](http://beautifuldata.net/2014/05/how-to-create-a-location-graph-from-the-foursquare-api/)
- [Classification of Moscow Metro stations using Foursquare data](https://towardsdatascience.com/classification-of-moscow-metro-stations-using-foursquare-data-fb8aad3e0e4)
- [A brief guide to using Foursquare API with a hands-on example in Python](https://medium.com/@aboutiana/a-brief-guide-to-using-foursquare-api-with-a-hands-on-example-on-python-6fc4d5451203)

Bounding box for NYC based on [geographic extent](https://www1.nyc.gov/assets/planning/download/pdf/data-maps/open-data/nybb_metadata.pdf?ver=18c)
- North 40.915568 
- South 40.495992
- East -73.699215
- West -74.257159 

In [7]:
# We can alternatively use the venue attribute City
bbox = [40.4959929,-74.257159,40.915568,-73.699215]  # bounding box for New York City

In [8]:
import foursquare
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from config import foursquare_config as fs_config

In [9]:
new_crawl = []  # list of locations to be crawled
done_crawl = []  # list of crawled locations 
links = []  # list of tuples that represent links between locations
venues = pd.DataFrame(columns=['id','name','type','lat','lng'])  # dataframe (not dict?) of location id => meta-data on location 

In [10]:
# Fill these out to run but don't save to Github
CLIENT_ID = fs_config['id']
CLIENT_SECRET = fs_config['secret']

client = foursquare.Foursquare(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)

In [11]:
# Root
#venue = client.venues('43695300f964a5208c291fe3')
venue = client.venues('40abf500f964a52035f31ee3')  # Washington Square Park
venue['venue']

Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5ee97c26a5f59c75993a8d47'}
Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5ee97c85e5ec386ad328e86a'}
Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5ee97ca57ab1524df8306afd'}


FoursquareException: Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5ee97ca57ab1524df8306afd'}

In [12]:
# Example atrribute extracted from a next venue
nv = client.venues.nextvenues('43695300f964a5208c291fe3')['nextVenues']['items']
nv[0]['categories'][0]['shortName']

'Scenic Lookout'

In [13]:
client.venues.nextvenues('43695300f964a5208c291fe3')

{'nextVenues': {'count': 5,
  'items': [{'id': '4bcca12bb6c49c7422169491',
    'name': '86th Floor Observation Deck',
    'location': {'address': '350 5th Ave',
     'crossStreet': 'btwn 33rd & 34th Sts',
     'lat': 40.74844544481811,
     'lng': -73.98568124187432,
     'labeledLatLngs': [{'label': 'display',
       'lat': 40.74844544481811,
       'lng': -73.98568124187432}],
     'postalCode': '10118',
     'cc': 'US',
     'city': 'New York',
     'state': 'NY',
     'country': 'United States',
     'formattedAddress': ['350 5th Ave (btwn 33rd & 34th Sts)',
      'New York, NY 10118',
      'United States']},
    'categories': [{'id': '4bf58dd8d48988d165941735',
      'name': 'Scenic Lookout',
      'pluralName': 'Scenic Lookouts',
      'shortName': 'Scenic Lookout',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/sceniclookout_',
       'suffix': '.png'},
      'primary': True}],
    'venuePage': {'id': '64514350'}},
   {'id': '49b7ed6df964a520305

In [None]:
def is_in_NYC(lat, long):
    print((lat > bbox[0]) & (lat < bbox[2]) & (long > bbox[1]) & (long < bbox[3]))
    return (lat > bbox[0]) & (lat < bbox[2]) & (long > bbox[1]) & (long < bbox[3])

In [None]:
def extract_venue_data(venue):
    #print(venue)
    
    data = {'id': [venue['venue']['id']],
            'name': [venue['venue']['name']],
            'type': [venue['venue']['categories'][0]['shortName']],
            'lat': [venue['venue']['location']['lat']],
            'lng': [venue['venue']['location']['lng']]}
    return data
    

In [None]:
#to_crawl = ['43695300f964a5208c291fe3']  # Example of the Empire State Building
to_crawl = ['40abf500f964a52035f31ee3']  # Washington Square Park
depth = 8

In [None]:
for i in range(depth):

    new_crawl = []
    print('Step {}: {} locations and {} links. {} venues to go.'.format(i,len(venues),len(links), len(to_crawl)))
    for v in to_crawl:
        # Only add extract and add data for new venue ids
        if v not in venues['id']:
            ven = client.venues(v)
            data = extract_venue_data(ven)
            venues = venues.append(pd.DataFrame(data))
            
            print(venues)
            
        # Add next venues that are in NYC but have not yet been visited   
            next_venues = client.venues.nextvenues(v)['nextVenues']['items']
        for nv in next_venues:
            
            print(nv['id'])
            lat = nv['location']['lat']
            long = nv['location']['lng']
            print(lat, long)
            if is_in_NYC(lat, long):
                print('in NYC')
                print(venues['id'])
                if nv['id'] not in venues['id']:
                    print(nv['id'])
                    data = extract_venue_data(client.venues(nv['id']))
                    venues = venues.append(pd.DataFrame(data))
                
                if (nv['id'] not in done_crawl) & (nv['id'] not in to_crawl) & (nv['id'] not in new_crawl):
                    print('new crawl')
                    new_crawl.append(nv['id'])
                links.append((v, nv['id']))
        done_crawl.append(v)
    to_crawl = new_crawl
                

In [6]:
links

[]

In [None]:
venues

In [None]:
venues.to_csv('venues_westvil.csv', index=False)

In [None]:
venues.drop_duplicates()

In [None]:
venues.drop_duplicates().to_csv('venues_westvil_unique.csv', index=False)

In [None]:
# Generate network
venues = venues.drop_duplicates()
labels = dict(zip(list(venues['id']),list(venues['name'])))

G = nx.DiGraph()
G.add_nodes_from(venues['id'])
for f, t in links:
    G.add_edge(f, t)
    
print(nx.info(G))

In [None]:
pagerank = nx.pagerank(G, alpha=.9)
venues['pagerank'] = [pagerank[n] for n in venues['id']]

betweenness = nx.betweenness_centrality(G)
venues['betweenness'] = [betweenness[n] for n in venues['id']]

venues

In [None]:
graph_pos = nx.spring_layout(G)
graph_pos

In [None]:
# Plot network

#fig = plt.figure(figsize(16,9), dpi=150)
nodesize = [10000 * n for n in pagerank.values()]
nx.draw_networkx_nodes(G, graph_pos, node_size=nodesize, alpha=.5, node_color='blue')
nx.draw_networkx_edges(G, graph_pos, width=1, alpha=.3, edge_color='blue')
nx.draw_networkx_labels(G, graph_pos, labels=labels, font_size=10, font_family='Arial')