  # OPTIMAL LOCATION FOR GAMEBUSTER

In [1]:
import pandas as pd
import numpy as np
import json, requests
import os
from dotenv import load_dotenv
load_dotenv()
import requests
from pandas.io.json import json_normalize

## Import *companies* from Mongodb

In [2]:
from pymongo import MongoClient
dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"
client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database()
cursor = db.list_collections()
from pymongo import ASCENDING, DESCENDING

--------

## Query tech startups with $1M+, younger than 15yrs old

In [3]:
cur = ({"$and":[{"total_money_raised":{"$regex":"(\$\d+\M)"}}, 
               {"founded_year":{"$gte":2005}},
               {"category_code":{"$in":["games_video","software","mobile","social","network_hosting","ecommerce", "web"]}}]})

startups = list(db["companies"].find(cur,{"_id":0,"name":1, "category_code":1, "total_money_raised":1, "offices":1}))

----------

## Moving those companies to a data frame

In [4]:
sus = pd.DataFrame(startups)

In [5]:
sus.head(5)

Unnamed: 0,name,category_code,total_money_raised,offices
0,Joost,games_video,$45M,"[{'description': '', 'address1': '100 5th Ave ..."
1,Mahalo,web,$21M,"[{'description': '', 'address1': '3525 Eastham..."
2,Thoof,web,$1M,"[{'description': None, 'address1': None, 'addr..."
3,Jangl SMS,web,$9M,"[{'description': '', 'address1': '6601 Owens D..."
4,Jajah,mobile,$33M,"[{'description': 'Jajah HQ', 'address1': '2513..."


-----------

## Explode the df to transform each element of a list-like to a row, replicating index values          
**('Offices' column is full of dicts inside lists)**

In [6]:
sus = sus.explode("offices")

---------

## Expand the offices column in order to separate each element within a row

In [10]:
coor_offi = sus.apply(lambda e: e.offices,axis=1,result_type="expand")
coor_offi.head(5)

Unnamed: 0,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,,100 5th Ave Fl 6,,10011-6903,New York,NY,USA,40.746497,-74.009447
1,,3525 Eastham Dr.,,90232,Culver City,CA,USA,34.017606,-118.487267
2,,,,,Austin,TX,USA,30.268735,-97.745209
3,,6601 Owens Dr # 240,,94588,Pleasanton,CA,USA,37.697805,-121.907768
4,Jajah HQ,2513 Charleston Road,Suite 102,94043,Mountain View,CA,USA,37.42339,-122.089951


------------

## Concat both data frames

In [20]:
sus_coor = pd.concat([sus[["name", "category_code", "total_money_raised"]], coor_offi], axis=1)
sus_coor.head(3)

Unnamed: 0,name,category_code,total_money_raised,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Joost,games_video,$45M,,100 5th Ave Fl 6,,10011-6903,New York,NY,USA,40.746497,-74.009447
1,Mahalo,web,$21M,,3525 Eastham Dr.,,90232,Culver City,CA,USA,34.017606,-118.487267
2,Thoof,web,$1M,,,,,Austin,TX,USA,30.268735,-97.745209


------------

## Transforming the latitude and longitude to standard GeoJSON Point

**=> {type:"Point", coordinates: [lat, long]}**

In [24]:
def transformToGeoPoint(s):
    
    if np.isnan(s.latitude) or np.isnan(s.longitude):
        
        return None
    
    return {
        "type":"Point",
        "coordinates":[s.longitude, s.latitude]
    }
    

sus_coor["geopoint"] = sus_coor.apply(transformToGeoPoint, axis=1)
sus_coor.head(3)

Unnamed: 0,name,category_code,total_money_raised,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,Joost,games_video,$45M,,100 5th Ave Fl 6,,10011-6903,New York,NY,USA,40.746497,-74.009447,"{'type': 'Point', 'coordinates': [-74.0094471,..."
1,Mahalo,web,$21M,,3525 Eastham Dr.,,90232,Culver City,CA,USA,34.017606,-118.487267,"{'type': 'Point', 'coordinates': [-118.487267,..."
2,Thoof,web,$1M,,,,,Austin,TX,USA,30.268735,-97.745209,"{'type': 'Point', 'coordinates': [-97.745209, ..."


# Foursquare's Places API

In [22]:
def foursquareAPI(search):
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
    client_id=os.getenv('CLIENT_ID'),
    client_secret=os.getenv('CLIENT_SECRET'),
    v='20180323',
    ll='40.7243,-74.0018',
    query=search,
    limit=1
    )
    
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    
    return data

In [23]:
df_starbucks = foursquareAPI('Starbucks')
df_starbucks

{'meta': {'code': 200, 'requestId': '5ef91137e771111ff8d0d279'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': '$-$$$$', 'key': 'price'},
    {'name': 'Open now', 'key': 'openNow'}]},
  'suggestedRadius': 600,
  'headerLocation': 'SoHo',
  'headerFullLocation': 'SoHo, New York',
  'headerLocationGranularity': 'neighborhood',
  'query': 'starbucks',
  'totalResults': 51,
  'suggestedBounds': {'ne': {'lat': 40.72212924883804,
    'lng': -74.00571597932256},
   'sw': {'lat': 40.71942958905259, 'lng': -74.00299678923213}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '516409d5e4b0ec7be4b67ab4',
       'name': 'Starbucks',
       'contact': {},
       'location': {'address': '370 Canal St',
        'crossStreet': 'at Sheraton Tribeca',