In [5]:
import json
import pandas as pd
import requests
from datetime import datetime

In [8]:
response = requests.get('https://coinmap.org/api/v1/venues/')
data = response.json()
venues_array = data['venues']

In [9]:
print(venues_array[0])

{'id': 8225, 'lat': 50.1220639499942, 'lon': -5.53289830684662, 'category': 'atm', 'name': 'One And All', 'created_on': 1465943680, 'geolocation_degrees': '5°31\'58.43"W, 50°7\'19.43"N'}


In [10]:

southernmost_lat = 18.464825 # southern tip of Hawaii
easternmost_lon = -66.949471 # eastern tip of Maine 

# This filtering step reduces the number of records from 22787 to 7379
# This still means we have to call the API 7379 times. Unfortunately 
# there are no further simple optimization we can make, since it turns
# out that >6000 of the 7379 actually lie within the United States 
venues_array = list(filter( lambda ven: int(ven['lon']) <= easternmost_lon, venues_array ))
venues_array = list(filter( lambda ven: int(ven['lat']) >= southernmost_lat, venues_array ))

print(len(list(venues_array)))

venues_usa = []

for venue in venues_array: 
    
    lat_param = venue['lat']
    lon_param = venue['lon']
    
    # Get response and convert it into a json
    response = requests.get(f'https://geo.fcc.gov/api/census/block/find?latitude={lat_param}&longitude={lon_param}&format=json')
    
    if response.status_code == 200:
        data = response.json()
        
        # We cast the state name and county name to string becuase when the coordinates lie
        # outside of the United States, these values are returned as type <class 'NoneType'>.
        # In this case, we simply skip this record and move on to the next on in the veneus_array.
        if str(data['State']['name']) == 'None' or str(data['County']['name']) == 'None':
            continue
        
        # If the venue is in the United States, we assign the state name and country name to the
        # current venue record. We then append this record, which now contains fields for state and
        # county to the venues_usa list. Casting to a string here is a defensive operation. 
        else:
            venue['state'] = str(data['State']['name'])
            venue['county'] = str(data['County']['name'])
            venues_usa.append(venue)
    else:
        print(response.status_code, response)
    
print(len(venues_usa))    

7382
6433


In [13]:
# Since the continuous process of calling the API for each record is so expensive
# we will want to save the data as soon as possible. Although the data has been 
# feature enigeered to some degree, it is still 'raw', so we will save it to the
# raw directory immediately
df_venues_usa = pd.json_normalize(venues_usa)
df_venues_usa

df_venues_usa.to_csv('../../data/raw/COINMAP_DATA_USA', index=False)

In [17]:
# Since the continuous process of calling the API for each record is so expensive
# we will want to save the data as soon as possible. 
df_venues_usa = pd.json_normalize(venues_usa)

df_venues_usa['created_on'] = pd.to_datetime(df_venues_usa['created_on'],unit='s')

df_venues_usa['year'] = df_venues_usa['created_on'].dt.year

df_venues_usa.drop(['id','promoted','name', 'created_on', 'geolocation_degrees'], axis=1, inplace=True)

df_venues_usa

Unnamed: 0,lat,lon,category,state,county,year
0,39.261575,-121.016567,default,California,Nevada,2013
1,39.714651,-104.936268,default,Colorado,Denver,2014
2,42.362679,-71.086398,shopping,Massachusetts,Middlesex,2014
3,37.770518,-122.450413,default,California,San Francisco,2013
4,40.752564,-86.346627,shopping,Indiana,Cass,2016
...,...,...,...,...,...,...
6428,33.245510,-111.794510,shopping,Arizona,Maricopa,2021
6429,39.745218,-75.548354,shopping,Delaware,New Castle,2021
6430,38.992496,-77.096252,atm,Maryland,Montgomery,2021
6431,38.903967,-76.981083,atm,District of Columbia,District of Columbia,2021


In [22]:
# reorder columns so indices are to the left
df_venues_usa = df_venues_usa[['state', 'county', 'lat', 'lon', 'year', 'category']]

df_venues_usa.dtypes

state        object
county       object
lat         float64
lon         float64
year          int64
category     object
dtype: object

In [20]:
df_venues_usa.to_csv('../../data/interim/CRYTO_VENUES_USA', index=False)

In [25]:
for cnty in df_venues_usa['county']:
    print(cnty)

Nevada
Denver
Middlesex
San Francisco
Cass
Marin
Franklin
Orange
East Baton Rouge
Johnson
Orange
Pinellas
Bexar
San Francisco
San Francisco
San Francisco
San Mateo
Mecklenburg
Brevard
Travis
Kings
San Diego
San Diego
San Diego
San Diego
San Diego
San Diego
San Diego
Riley
Williamson
Pinellas
Pinellas
Baltimore
Morris
Morris
Hillsborough
King
San Diego
San Francisco
Travis
Travis
Pinellas
Saline
Jackson
Jackson
Boulder
Albany
Santa Clara
Middlesex
Philadelphia
Travis
Riley
San Luis Obispo
Monroe
Gallatin
Salt Lake
Sequoyah
Denver
Albany
Sedgwick
Travis
Philadelphia
San Mateo
Middlesex
Kent
Travis
New York
Lee
Somerset
San Francisco
Oklahoma
Cleveland
Douglas
Pinellas
Bernalillo
Pinellas
Shawnee
Shawnee
Miami-Dade
Chester
Sacramento
Pinellas
Jefferson
Multnomah
Chatham
Riley
Summit
Pinellas
Pinellas
New York
Clark
Johnson
El Paso
Los Angeles
Santa Clara
Cuyahoga
Wabash
Hennepin
Marion
Highland
Johnson
Solano
Bernalillo
Harris
Philadelphia
Cook
Pasco
San Francisco
San Francisco
Monmouth
W