## DATA 620

---
### Project 1
### Mael Illien and Jeremy OBrien

Reference materials:
- [Foursquare Endpoints Overview](https://developer.foursquare.com/docs/places-api/endpoints/)
- [Foursquare Places API Next Venues](https://developer.foursquare.com/docs/api-reference/venues/nextvenues/)
- [Foursquare Places API Venue Categories](https://developer.foursquare.com/docs/api-reference/venues/categories/)
- [Foursquare Places API Authentication](https://developer.foursquare.com/docs/places-api/authentication/)
- [Building a Foursquare Location Graph](https://nbviewer.jupyter.org/github/furukama/IPythonNotebooks/blob/master/Building%20a%20Foursquare%20Location%20Graph.ipynb)
- [How to create a location graph from the Foursquare API](http://beautifuldata.net/2014/05/how-to-create-a-location-graph-from-the-foursquare-api/)
- [Classification of Moscow Metro stations using Foursquare data](https://towardsdatascience.com/classification-of-moscow-metro-stations-using-foursquare-data-fb8aad3e0e4)
- [A brief guide to using Foursquare API with a hands-on example in Python](https://medium.com/@aboutiana/a-brief-guide-to-using-foursquare-api-with-a-hands-on-example-on-python-6fc4d5451203)

Bounding box for NYC based on [geographic extent](https://www1.nyc.gov/assets/planning/download/pdf/data-maps/open-data/nybb_metadata.pdf?ver=18c)
- North 40.915568 
- South 40.495992
- East -73.699215
- West -74.257159 

In [1]:
bbox = [40.4959929,-74.257159,40.915568,-73.699215]  # bounding box for New York City

In [8]:
import foursquare
import pandas as pd
from config import foursquare_config

In [9]:
new_crawl = []  # list of locations to be crawled
done_crawl = []  # list of crawled locations 
links = []  # list of tuples that represent links between locations
venues = pd.DataFrame()  # dataframe (not dict?) of location id => meta-data on location 

In [10]:
# Fill these out to run but don't save to Github
CLIENT_ID = foursquare_config['id']
CLIENT_SECRET = foursquare_config['secret']

client = foursquare.Foursquare(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)

In [17]:
res = client.venues('43695300f964a5208c291fe3')
res['venue']

{'id': '43695300f964a5208c291fe3',
 'name': 'Empire State Building',
 'contact': {'phone': '2127363100',
  'formattedPhone': '(212) 736-3100',
  'twitter': 'empirestatebldg',
  'instagram': 'empirestatebldg',
  'facebook': '153817204635459',
  'facebookUsername': 'empirestatebuilding',
  'facebookName': 'Empire State Building'},
 'location': {'address': '350 5th Ave',
  'crossStreet': 'btwn 33rd & 34th St',
  'lat': 40.7485995507123,
  'lng': -73.98580648682452,
  'postalCode': '10118',
  'cc': 'US',
  'neighborhood': 'Midtown Manhattan, New York, NY',
  'city': 'New York',
  'state': 'NY',
  'country': 'United States',
  'formattedAddress': ['350 5th Ave (btwn 33rd & 34th St)',
   'New York, NY 10118',
   'United States']},
 'canonicalUrl': 'https://foursquare.com/v/empire-state-building/43695300f964a5208c291fe3',
 'categories': [{'id': '4bf58dd8d48988d130941735',
   'name': 'Building',
   'pluralName': 'Buildings',
   'shortName': 'Building',
   'icon': {'prefix': 'https://ss3.4sqi.n

In [11]:
to_crawl = ['43695300f964a5208c291fe3']  # Example of the Empire State Building
depth = 8

for i in range(depth):

    new_crawl = []
    print('Step ' + str(i) + ': ' + str(len(venues)) + ' locations and ' + str(len(links)) + ' links.' + str(len(to_crawl)) + ' venues to go.')
    
    for v in to_crawl:
        
        if v not in venues:
            res = client.venues(v)
            venues = venues.append(pd.DataFrame(
                
                # 3rd-level subset (userCount, checkinsCount, lat, lng) throwing errors
                {'name':res['venue']['name'],
                 'users':res['venue']['stats']['usersCount'],
                 'checkins':res['venue']['stats']['checkinsCount'],
                 'lat':res['venue']['location']['lat'],
                 'lng':res['venue']['location']['lng']},
                index = [v]
            ))
        
        next_venues = client.venues.nextvenues(v)
        
        for nv in next_venues['nextVenues']['items']:
            
            if ((nv['location']['lat'] > bbox[1]) & 
                (nv['location']['lat'] < bbox[3]) & 
                (nv['location']['lng'] > bbox[0]) & 
                (nv['location']['lng'] < bbox[2])):
                
                if nv['id'] not in venues:
                    venues = venues.append(pd.DataFrame(
                        {'name':nv['name'], 
                         'users':nv['stats']['usersCount'],
                         'checkins':nv['stats']['checkinsCount'],
                         'lat':nv['location']['lat'],
                         'lng':nv['location']['lng']},
                        index = [nv['id']]
                    ))
                
                if (nv['id'] not in done_crawl) & (nv['id'] not in to_crawl) & (nv['id'] not in new_crawl):
                    new_crawl.append(nv['id'])
                
                links.append(v, nv['id'])
            
            done_crawl.append(v)
            
        to_crawl = new_crawl
                

Step 0: 0 locations and 0 links.1 venues to go.


KeyError: 'usersCount'

In [None]:
# Generate network
venues = venues.reset.index().drop_duplicates(cols='index',take_last=True).set_index('index')
labels = venues['name'].to_dict()

import network as nx
G = nv.DiGraph()
G.add_nodes_from(venues.index)
for f, t in links:
    G.add_edge(f, t)
    
nx.info(G)

pagerank = nx.pagerank(G, alpha=.9)
venues['pagerank'] = [pagerank[n] for n in venues.index]

betweenness = nx.betweenness_centrality(G)
venues['betweenness'] = [betweenness[n] for n in venues.index]

# Plot network

fig = plt.figure(figsize(16,9), dpi=150))
graph_pos = nx.spring_layout(G)
nodesize = [10000 * n for n in page.rank.values()]
nx.draw_networks_nodes(G, graph_pos, node_size=nodesize, alpha=.5, node_color='blue')
nx.draw_networks_edges(G, graph_pos, width=1, alpha=.3, edge_color='blue')
nx.draw_networkx_labels(G, graph_pos, labels=labels, font_size=10, font_family='Arial')