# Lab01: Working with APIs

By Rob Hendrickson

In [14]:
### Import Libraries

# File manipulation

import os # For working with Operating System
import urllib # For accessing websites
import requests # For accessing websites
import zipfile # For extracting from Zipfiles
from io import BytesIO, StringIO # For reading bytes objects
import getpass # Inputting passwords

# Analysis

import numpy as np # For working with Arrays
import pandas as pd # Data Manipulation
import geopandas as gpd # Spatial Data Manipulation

# Visualization

import matplotlib.pyplot as plt # Basic Plotting
# import seaborn as sns # Statistical Plotting
# import contextily # Base Map Visualization

# Silence warnings
import warnings
warnings.filterwarnings('ignore') # Ignores some warnings

In [15]:
### Definitions

cwd = os.getcwd() # Current Working Directory

def extract_zip_from_url(url=None):
    '''Extract a zipfile from the internet and unpack it in to it's own folder within working directory.
    Takes a single url (string).'''
    
    if type(url) == str: # Single url
        # Create folder name for file
        folder_name = url.split('/')[-1][:-4]
        # Make folder for files
        path = os.path.join(cwd, folder_name)
        if folder_name not in os.listdir():
            os.mkdir(path)
        # Unload zip into the new folder
        response = urllib.request.urlopen(url) # Get a response
        zip_folder = zipfile.ZipFile(BytesIO(response.read())) # Read Response
        zip_folder.extractall(path=path) # Extract files
        zip_folder.close() # Close zip object
    else:
        print('Error Extracting: Invalid Input')

## [Minnesota Geospatial Commons](https://gisdata.mn.gov/content/?q=help/api)

In [16]:
# Download Data from Minnesota Geospatial Commons

## Twin Cities Metro Boundaries & AADT - Downloaded from MN GeospatialCommons gisdata.mn.gov  (~ 6mb)

boundary_url = "https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_metc/bdry_census2010counties_ctus/shp_bdry_census2010counties_ctus.zip"
aadt_url = 'https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_dot/trans_aadt_traffic_segments/shp_trans_aadt_traffic_segments.zip'

extract_zip_from_url(boundary_url)
extract_zip_from_url(aadt_url)

In [17]:
# Get Local Filepaths

boundary_folder = boundary_url.split('/')[-1][:-4] # Get folder name (last part of address minus .zip)
boundary_file = 'Census2010CountiesAndCTUs.shp'
boundary_path = os.path.join(boundary_folder, boundary_file)

aadt_folder = aadt_url.split('/')[-1][:-4]
aadt_file = 'Annual_Average_Daily_Traffic_Segments_in_Minnesota.shp'
aadt_path = os.path.join(aadt_folder, aadt_file)

# Load into geopandas

ctus = gpd.read_file(boundary_path) # Municipal boundaries
aadt = gpd.read_file(aadt_path) # Traffic Segments with Current Annual Average Daily Traffic

## [Google Places](https://developers.google.com/maps/documentation/places/web-service/overview)

In [18]:
def google_places_to_gdf(url, api):

    ''' This function will take a url to the goole api and convert the response into a geodataframe.
        It does NOT work with a "find place" search
        
    # To Download Data from Google Places API
    # Must create a project on google API Console - https://console.developers.google.com/
    # Enable Google Places API
    # They need a credit card...

    # Base of the url = https://maps.googleapis.com/maps/api/place/details/output?parameters
    '''
    
    api_url = url + '&key=' + api
    
    response = requests.request("GET", api_url) # Get request
    
    results = response.json()['results'] # Read request as a dictionary
    df = pd.DataFrame(results) # Convert Dictionary to DataFrame (without correct "geometry" column)
    
    # Get lat/longs for geometry column

    df['x'] = None # Initialize column for Longitude
    df['y'] = None # Initialize column for Latitude

    for i, row in df.iterrows(): # Iterate through rows
        df.loc[i,'x'] = row.geometry['location']['lng'] # Get info
        df.loc[i,'y'] = row.geometry['location']['lat']

    # Convert to GeoDataFrame

    gdf = gpd.GeoDataFrame(df.drop(columns='geometry'),
                           geometry = gpd.points_from_xy(df['x'], df['y']),
                           crs = 'EPSG:4326')
    
    return gdf

In [19]:
api = getpass.getpass('Please enter your Google API key:')

Please enter your Google API key: ·······································


In [20]:
# Search for primary schools nearby intersection of 94 and 35W

url = 'https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=44.965676%2C-93.259512&rankby=distance&type=primary_school&keyword=school'
schools = google_places_to_gdf(url, api)

# Search for Municipal Pools in North Dakota

url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?input=Municipal%20Pool%20in%20North%20Dakota&inputtype=textquery&locationbias=rectangle:45.951407,-104.048971|49,-96.561788'
pools_nd = google_places_to_gdf(url, api)

## [NDAWN](https://ndawn.ndsu.nodak.edu//)

In [21]:
# Get a csv from their api

# Below Example: Monthly max/min/avg temp of Minot, ND for the past year
# https://ndawn.ndsu.nodak.edu/table.csv?station=40&variable=mdmxt&variable=mdmnt&variable=mdavt&year=2022&ttype=monthly&quick_pick=1_y&begin_date=2021-09&count=12

# This one gets max/min/avg temp for all stations for the past year
url = 'https://ndawn.ndsu.nodak.edu/table.csv?station=78&station=111&station=98&station=174&station=142&station=138&station=161&station=9&station=10&station=118&station=56&station=11&station=12&station=58&station=13&station=84&station=55&station=7&station=87&station=14&station=15&station=96&station=16&station=137&station=124&station=143&station=17&station=85&station=140&station=134&station=18&station=136&station=65&station=104&station=99&station=19&station=129&station=20&station=101&station=81&station=21&station=97&station=22&station=75&station=2&station=172&station=139&station=23&station=62&station=86&station=24&station=89&station=126&station=93&station=90&station=25&station=83&station=107&station=156&station=77&station=26&station=70&station=127&station=27&station=132&station=28&station=29&station=30&station=31&station=102&station=32&station=119&station=4&station=80&station=33&station=59&station=105&station=82&station=34&station=72&station=135&station=35&station=76&station=120&station=141&station=109&station=36&station=79&station=71&station=37&station=38&station=39&station=130&station=73&station=40&station=41&station=54&station=69&station=113&station=128&station=42&station=43&station=103&station=116&station=88&station=114&station=3&station=163&station=64&station=115&station=67&station=44&station=133&station=106&station=100&station=121&station=45&station=46&station=61&station=66&station=74&station=60&station=125&station=8&station=47&station=122&station=108&station=5&station=152&station=48&station=68&station=49&station=50&station=91&station=117&station=63&station=150&station=51&station=6&station=52&station=92&station=112&station=131&station=123&station=95&station=53&station=57&station=149&station=148&station=110&variable=mdmxt&variable=mdmnt&variable=mdavt&year=2022&ttype=monthly&quick_pick=1_y&begin_date=2021-09&count=12'
response = requests.request('GET', url)

In [22]:
# Find where CSV Starts in the response
# The beginning is a header describing what NDAWN is and such

start = response.text.find('Station Name')

# Decoding string

decoding = StringIO(response.text[start:])

# Read into Pandas

temps = pd.read_csv(decoding).iloc[1:,:] # Skipping first entry, it just gives the units of each column

# Spatialize

temps_gdf = gpd.GeoDataFrame(temps,
                             geometry = gpd.points_from_xy(x = temps.Longitude, y = temps.Latitude),
                             crs = 'EPSG:4326')

# Find the stations/months that had pool-worthy days

pool_days = temps_gdf[pd.to_numeric(temps_gdf['Max Temp']) > 80] # Months/stations that had > 80 degree days

# Group by unique stations

pool_days_gp = pool_days.groupby('Station Name').agg({'geometry':['unique'],
                                       'Month':['unique']})

# Get a new geodataframe with station name and months they were pool-worthy

pool_days_by_sta = gpd.GeoDataFrame(pool_days_gp.Month, 
                                    geometry = pool_days_gp.geometry.unique.apply(lambda x:x[0]),
                                    crs = temps_gdf.crs).rename(columns = {'unique':'Months'})

# Convert np.arrays in Months into lists for saving in the future

pool_days_by_sta['Months'] = pool_days_by_sta.Months.apply(lambda x: list(x))

## Spatially Join Datasets

In [23]:
# Spatially Join Municipal Boundaries to Roads

# Check CRS

print('The CTU dataset is in the ', ctus.crs, ' CRS.')
print('The AADT dataset is in the ', aadt.crs, ' CRS.')
if ctus.crs == aadt.crs:
    print('They are in the same CRS, UTM 15N')
else:
    print('Transforming...')
    ctus = ctus.to_crs(aadt.crs)

# Clip AADT to CTU boundary

aadt_clipped = gpd.clip(aadt, ctus).reset_index()

# Spatially Join (Road segments keep their geometry and get Municipality information)

aadt_w_ctus = gpd.sjoin(left_df = aadt_clipped, right_df = ctus, how = 'left')

print(aadt_w_ctus.head())

The CTU dataset is in the  epsg:26915  CRS.
The AADT dataset is in the  epsg:26915  CRS.
They are in the same CRS, UTM 15N
   index  SEQUENCE_N   FROM_DATE     TO_DATE ROUTE_LABE   STREET_NAM  \
0  20464       32942  1997-01-01  4000-01-01    CSAH 86    30th St W   
1  26143       42156  1998-01-01  4000-01-01    CSAH 91  Natchez Ave   
1  26143       42156  1998-01-01  4000-01-01    CSAH 91  Natchez Ave   
2  26147       42160  1998-01-01  4000-01-01     CSAH 2   260th St E   
2  26147       42160  1998-01-01  4000-01-01     CSAH 2   260th St E   

                             LOCATION_D  VEHICLE_CL  \
0                  E OF SCOTT CO CSAH91           0   
1  S OF CSAH2 (MAIN ST/260th ST E) ELKO           0   
1  S OF CSAH2 (MAIN ST/260th ST E) ELKO           0   
2      E OF CSAH91 (NATCHEZ AV) IN ELKO           0   
2      E OF CSAH91 (NATCHEZ AV) IN ELKO           0   

                            DAILY_FACT                           SEASONAL_F  \
0                 11 - Sim WkDay/W

In [24]:
# Spatially join schools to the roads from above 

# Schools keep their geometry and get road information

schools_utm = schools.to_crs(aadt.crs) # Change to correct CRS
schools_w_roads = gpd.sjoin_nearest(schools_utm, aadt) # Join 

print(schools_w_roads.head())

  business_status                                               icon  \
0     OPERATIONAL  https://maps.gstatic.com/mapfiles/place_api/ic...   
1     OPERATIONAL  https://maps.gstatic.com/mapfiles/place_api/ic...   
2     OPERATIONAL  https://maps.gstatic.com/mapfiles/place_api/ic...   
3     OPERATIONAL  https://maps.gstatic.com/mapfiles/place_api/ic...   
4     OPERATIONAL  https://maps.gstatic.com/mapfiles/place_api/ic...   

  icon_background_color                                 icon_mask_base_uri  \
0               #7B9EB0  https://maps.gstatic.com/mapfiles/place_api/ic...   
1               #7B9EB0  https://maps.gstatic.com/mapfiles/place_api/ic...   
2               #7B9EB0  https://maps.gstatic.com/mapfiles/place_api/ic...   
3               #7B9EB0  https://maps.gstatic.com/mapfiles/place_api/ic...   
4               #7B9EB0  https://maps.gstatic.com/mapfiles/place_api/ic...   

                                       name        opening_hours  \
0             Emerson Dual Lan

In [25]:
# Spatially join NDAWN stations to pools in ND (they're in the same CRS)
# I know they should be in a UTM CRS for these calculations... But accuracy isn't as important here
# Stations keep their geometry and gain nearest pool info

stations_w_pools = gpd.sjoin_nearest(pool_days_by_sta, pools_nd) # Join 

print(stations_w_pools.sample(5))

                   Months                     geometry  index_right  \
Station Name                                                          
Garrison       [7.0, 8.0]  POINT (-101.67955 47.71023)            5   
Minot          [7.0, 8.0]  POINT (-101.30797 48.18043)           10   
Medicine Hole  [7.0, 8.0]  POINT (-102.99209 47.55994)           16   
Rice           [6.0, 7.0]   POINT (-94.26182 45.79384)            2   
Turtle Lake    [7.0, 8.0]  POINT (-100.91529 47.56793)            5   

              business_status  \
Station Name                    
Garrison          OPERATIONAL   
Minot             OPERATIONAL   
Medicine Hole     OPERATIONAL   
Rice              OPERATIONAL   
Turtle Lake       OPERATIONAL   

                                               formatted_address  \
Station Name                                                       
Garrison        443 1st St NE, Garrison, ND 58540, United States   
Minot              E Central Ave, Minot, ND 58701, United States  

## Save the Joined Datasets

In [26]:
# The spatially joined datasets were:
# aadt_w_ctus, schools_w_roads, and stations_w_pools
# Now to save them as geojsons add them into a arcpro geodatabase

# Save GeoDataFrames as geojsons

datasets = [aadt_w_ctus, schools_w_roads, stations_w_pools]
names = ['roads_w_ctus.geojson', 'schools_w_roads.geojson', 'stations_w_pools.geojson']

# Iterate through datasets

for i, data in enumerate(datasets):
    
    path = os.path.join('Results', names[i]) # Save Path
    
    if 'photos' in data.columns: # Remove photos column (lists within dictionary - tough to save...)
        data = data.drop(columns=['photos'])
        
    # Make all other lists into dictionaries
    
    for column in data.columns:
        if column != 'geometry':
            if (type([]) in data[column].apply(lambda x: type(x)).values): # If a list is in the series
                for i, row in data.iterrows(): # Iterate through elements
                    if (type(row[column]) == list): # If a list
                        l = data.loc[i, column] # Get the list
                        new_l = dict(zip(range(len(l)), l)) # Convert to dictionary
                        data.loc[[i], [column]] = new_l # Replace as dictionary
                        
    data.to_file(path) # Save File

## Add to GeoDataBase

In [None]:
# import Arcpy

import arcpy

# Set Working Directory

arcpy.env.workspace = os.getcwd() + 'Arc1_Lab1.gdb'

# Add Geojsons to GeoDataBase

files = os.listdir('Results')

for file in files:
    path = os.path.join('Results', file)
    feature_name = file.split('.')[0]
    
    if feature_name == 'roads_w_ctus':
        geom_type = 'Polyline'
    else:
        geom_type = 'Point'
        
    arcpy.JSONToFeatures_conversion(path, os.path.join("Arc1_Lab1.gdb", feature_name), geom_type)