In [3]:
import configparser
import requests
import datetime
from psycopg2 import connect
from psycopg2 import sql
from psycopg2.extras import execute_values
import logging
from time import sleep
import click
CONFIG = configparser.ConfigParser()
CONFIG.read('/home/bqu/db_ec2.cfg')
#CONFIG.read('/home/bqu/db_morbius.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

## Logger

In [4]:
"""The following provides information about the code when it is running and prints out the log messages 
if they are of logging level equal to or greater than INFO"""
LOGGER = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

## Get mapserver name and generate table name

In [5]:
def mapserver_name(mapserver_n):
    """
    Function to return cot_geospatial mapserver from integer

    Parameters
    -----------
    mapserver_n 
        the mapserver number

    Return
    -------
    map
    """
    
    switcher ={
        0 : 'cot_geospatial',
        2 : 'cot_geospatial2',
        3 : 'cot_geospatial3',
        5 : 'cot_geospatial5',
        6 : 'cot_geospatial6', 
        7 : 'cot_geospatial7',
        8 : 'cot_geospatial8',
        10 : 'cot_geospatial10',
        11 : 'cot_geospatial11',
        12 : 'cot_geospatial12',
        13 : 'cot_geospatial13',
        14 : 'cot_geospatial14',
        15 : 'cot_geospatial15',
        16 : 'cot_geospatial16',
        17 : 'cot_geospatial17',
        18 : 'cot_geospatial18',
        19 : 'cot_geospatial19',
        20 : 'cot_geospatial20',
        21 : 'cot_geospatial21',
        22 : 'cot_geospatial22',
        23 : 'cot_geospatial23',
        24 : 'cot_geospatial24',
        25 : 'cot_geospatial25',
        26 : 'cot_geospatial26',
        27 : 'cot_geospatial27',
        28 : 'cot_geospatial28'
        }
    func = switcher.get(mapserver_n)
    return(func)


In [6]:
def get_tablename(mapserver, layer_id, include_date = False):
    """
    Function to retrieve the name of the layer

    Parameters
    -----------
    mapserver
        The mapserver that host the layer
    layer_id
        The id of the layer

    Returns
    --------
    output_name
        The table name of the layer in database
    """
    url = 'https://insideto-gis.toronto.ca/arcgis/rest/services/'+mapserver+'/MapServer/layers?f=json'
    r = requests.get(url, verify = False)
    ajson = r.json()
    layers = ajson['layers']
    for layer in layers:
        if layer['id'] == layer_id:
            output_name = (layer['name'].lower()).replace(' ', '_')
        else:
            continue
    
    # For the layers that will be pulled into a partitioned table, add the current pull's date to table name
    if include_date:
        today = datetime.date.today().strftime('_%Y%m%d')
        output_name = output_name + today
    return output_name

## Create table in DB

In [7]:
def get_fieldtype(field):
    if field == 'esriFieldTypeInteger' or field == 'esriFieldTypeSingle' or field == 'esriFieldTypeInteger' or field=='esriFieldTypeOID' or field == 'esriFieldTypeSmallInteger' or field =='esriFieldGlobalID':
        fieldtype = 'integer'
    elif field == 'esriFieldTypeString':
        fieldtype = 'text'
    elif field == 'esriFieldTypeDouble':
        fieldtype = 'numeric'
    elif field == 'esriFieldTypeDate':
        fieldtype = 'timestamp without time zone'
    return fieldtype

In [8]:
def create_table(output_table, return_json, schema_name):
    '''Create a new table in postgresql for the layer'''
    
    fields = return_json['fields']
    insert_column_list = [sql.Identifier((field['name'].lower()).replace('.', '_')) + sql.SQL(' ') for field in fields]
    insert_column_list.append(sql.Identifier('geom'))
    insert_column = sql.SQL(',').join(insert_column_list)
    print(insert_column.as_string(con))
    
    with con:
        with con.cursor() as cur:
            
            col_list = [sql.Identifier((field['name'].lower()).replace('.', '_')) + sql.SQL(' ') + sql.SQL(get_fieldtype(field["type"])) for field in fields]
            col_list.append(sql.Identifier('geom') + sql.SQL(' ') + sql.SQL('geometry'))
            col_list_string = sql.SQL(',').join(col_list)
            
            create_sql = sql.SQL("CREATE TABLE IF NOT EXISTS {schema}.{table} ({columns})").format(schema = sql.Identifier(schema_name),
                                                                      table = sql.Identifier(output_table),
                                                                      columns = col_list_string)
            print(create_sql.as_string(con))
            cur.execute(create_sql)
    return insert_column

In [9]:
# Geometry Switcher 
def line(geom):
    return 'SRID=4326;LineString('+','.join(' '.join(str(x) for x in tup) for tup in geom['paths'][0]) +')'
def polygon(geom):
    return 'SRID=4326;MultiPolygon((('+','.join(' '.join(str(x) for x in tup) for tup in geom['rings'][0]) +')))'
def point(geom):
    return 'SRID=4326;Point('+(str(geom['x']))+' '+ (str(geom['y']))+')'  
def get_geometry(geometry_type, geom):
    switcher = {
        'esriGeometryLine':line,
        'esriGeometryPolyline': line, 
        'esriGeometryPoint': point, 
        'esriGeometryMultiPolygon': polygon,
        'esriGeometryPolygon': polygon
    }
    func = switcher.get(geometry_type)
    geometry = (func(geom)) 
    return geometry

In [10]:
def to_time(input):
    '''Convert epoch time to postgresql timestamp without time zone'''    
    time = datetime.datetime.fromtimestamp(abs(input)/1000).strftime('%Y-%m-%d %H:%M:%S')
    return time

## Insert data from ArcGIS to DB

In [11]:
def get_data(mapserver, layer_id, max_number = None, record_max = None):
    '''Get data from gcc view rest api'''        
    base_url = "https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/{}/query".format(mapserver, layer_id)
    
    """ Added stuff """
    '''If the data we want to get is centreline'''
    """
    if layer_id == 2:
        query = {
    """
    
    query = {"where":"1=1",
             "outFields": "*",
             "outSR": '4326',         
             "returnGeometry": "true",
             "returnTrueCurves": "false",
             "returnIdsOnly": "false",
             "returnCountOnly": "false",
             "returnZ": "false",
             "returnM": "false",
             "orderByFields": "OBJECTID", 
             "returnDistinctValues": "false",
             "returnExtentsOnly": "false",
             "resultOffset": "{}".format(max_number),
             "resultRecordCount": "{}".format(record_max),
             "f":"json"}
    while True:
        try :
            r = requests.get(base_url, params = query, verify = False)
        except requests.exceptions.ConnectionErrors:
            sleep(10)
            continue
        else:
            return_json = r.json() 
            break
    return return_json

In [12]:
def find_limit(return_json):
    '''Check if last query return all rows'''   
    if return_json.get('exceededTransferLimit', False) == True:
        keep_adding = True
    else:
        keep_adding = False
    return keep_adding   

In [13]:
def insert_data(output_table, insert_column, return_json, schema_name):
    '''Send data to postgresql'''   
    rows = []
    features = return_json['features']
    fields = return_json['fields']
    trials = [[field['name'], field['type']] for field in fields]
    for feature in features:
        geom = feature['geometry']
        geometry_type = return_json['geometryType']
        geometry = get_geometry(geometry_type, geom)
        row = [feature['attributes'][trial[0]] if trial[1] != 'esriFieldTypeDate' or feature['attributes'][trial[0]] == None else to_time(feature['attributes'][trial[0]]) for trial in trials]
        row.append(geometry)
        
        rows.append(row)
    
    insert=sql.SQL("INSERT INTO {schema}.{table} ({columns}) VALUES %s").format(
        schema = sql.Identifier(schema_name), 
        table = sql.Identifier(output_table), 
        columns = insert_column
    )
    with con:
        with con.cursor() as cur:
               execute_values(cur, insert, rows)
    LOGGER.info('Successfully inserted %d records into %s', len(rows), output_table)

## Enter mapserver_id and layer_id, get their PK

In [2]:
pk_dict = {
	"city_ward": "area_id",
    "census_tract": "area_id",
    "neighbourhood_improvement_area": "area_id",
    "priority_neighbourhood_for_investment": "area_id",
    "ibms_district": "area_id",
    "ibms_grid": "area_id",
    "bikeway": "centreline_id",
    "traffic_camera": "objectid",
    "permit_parking_area": "objectid",
    "prai_transit_shelter": "objectid",
    "traffic_bylaw_point": "objectid",
    "traffic_bylaw_line": "objectid",
    "loop_detector": "objectid",
    "electrical_vehicle_charging_station": "objectid",
    "day_care_centre": "objectid",
    "middle_childcare_centre": "objectid",
    "business_improvement_area": "objectid",
    "proposed_business_improvement_area": "objectid",
    "film_permit_all": "objectid",
    "film_permit_parking_all": "objectid",
    "hotel": "objectid",
    "convenience_store": "objectid",
    "supermarket": "objectid",
    "place_of_worship": "objectid",
    "ymca": "objectid",
    "aboriginal_organization": "objectid",
    "attraction": "objectid",
    "dropin": "objectid",
    "early_years_centre": "objectid",
    "family_resource_centre": "objectid",
    "food_bank": "objectid",
    "longterm_care": "objectid",
    "parenting_family_literacy": "objectid",
    "retirement_home": "objectid",
    "senior_housing": "objectid",
    "shelter": "objectid",
    "social_housing": "objectid",
    "private_road": "objectid",
    "school": "objectid",
    "library": "objectid",
	}

In [None]:
def concatenate_mapserver_layer(mapserver_n, layer_id):
    num_pair = str(mapserver_n)+'_'+str(layer_id)
    return num_pair


# NEEDS TO BE UPDATED
def retrieve_layer_name(num_pair):
    switcher = {
            '0_0': ward,
            '0_2': centreline,
            '2_2': bike,
            '2_3': traffic_camera,
            '2_9': traffic_signal,
            '2_11': permit_parking,
            '2_35': prai_transit_shelter, 
            '2_37': tmms_service_request,
            '2_38': bylaw_pt,
            '2_39': bylaw_line,
            '20_1': ev_charging_station,
            '22_1': day_care,
            '22_2': middle_child,
            '23_1': bia,
            '23_13': proposed_bia,
            '23_9': film_permit,
            '23_10': film_parking,
            '23_12': hotel,
            '26_1': convenience_store,
            '26_4': supermarket,
            '26_3': worship,
            '26_6': ymca,
            '26_7': census_tract,
            '26_11': neighbourhood_impro,
            '26_13': priority_neigh,
            '26_16': neigh_demo,
            '26_45': aborginal,
            '26_46': attraction,
            '26_47': dropin, 
            '26_48': early_year,
            '26_49': family_resource,
            '26_50': food_bank,
            '26_53': long_term_care,
            '26_54': parenting_family_lit,
            '26_58': retirement,
            '26_59': senior_housing, 
            '26_61': shelter,
            '26_62': social_housing,
            '27_13': private_road,
            '28_17': school,
            '28_28': library
        
    }
    layer_name = switcher.get(num_pair)
    return layer_name

## Main function that the Task calls

In [14]:
# Added 'schema_name' to the function
def get_layer(mapserver_n, layer_id, schema_name, include_date = False):
    
    """
    This function calls to the GCCview rest API and inserts the outputs to the output table in the postgres database.

    Parameters
    ----------
    mapserver : int
        The name of the mapserver that host the desire layer

    layer_id : int
        The id of desire layer
        
    """  
    mapserver = mapserver_name(mapserver_n)
    output_table = get_tablename(mapserver, layer_id, include_date)
    #--------------------------------
    # retrieve_layer_name(concatenate_mapserver_layer(mapserver_n, layer_id))
    primary_key = pk_dict.get(output_table)
    
    #--------------------------------
    keep_adding = True
    counter = 0

    while keep_adding == True:
        
        if counter == 0:
            return_json = get_data(mapserver, layer_id)
            # Added 'schema_name'
            insert_column = create_table(output_table, return_json, schema_name)
            features = return_json['features']
            record_max=(len(features))
            max_number = record_max
            # Added 'schema_name'
            insert_data(output_table, insert_column, return_json, schema_name)
            counter += 1
            keep_adding = find_limit(return_json)
            if keep_adding == False:
                LOGGER.info('All records from [mapserver: %s, layerID: %d] have been inserted into %s', mapserver, layer_id, output_table)
        else:
            return_json = get_data(mapserver, layer_id, max_number = max_number, record_max = record_max)
            insert_data(output_table, insert_column, return_json, schema_name)
            counter += 1
            keep_adding = find_limit(return_json)
            if keep_adding == True:
                max_number = max_number + record_max
            else:
                LOGGER.info('All records from [mapserver: %s, layerID: %d] have been inserted into %s', mapserver, layer_id, output_table)

## Testing out input values

In [17]:
mapserver_n = 
layer_id = 
schema_name = 'bqu'

In [18]:
get_layer(mapserver_n, layer_id, schema_name)



"geo_id" ,"name" ,"school_level" ,"school_type" ,"board_name" ,"school_type_desc" ,"address_point_id" ,"address_number" ,"linear_name_full" ,"address_full" ,"postal_code" ,"municipality" ,"city" ,"place_name" ,"general_use_code" ,"centreline_id" ,"lo_num" ,"lo_num_suf" ,"hi_num" ,"hi_num_suf" ,"linear_name_id" ,"x" ,"y" ,"latitude" ,"longitude" ,"objectid" ,"source_address" ,"geom"
CREATE TABLE IF NOT EXISTS "bqu"."school" ("geo_id" numeric,"name" text,"school_level" text,"school_type" text,"board_name" text,"school_type_desc" text,"address_point_id" integer,"address_number" text,"linear_name_full" text,"address_full" text,"postal_code" text,"municipality" text,"city" text,"place_name" text,"general_use_code" integer,"centreline_id" integer,"lo_num" integer,"lo_num_suf" text,"hi_num" integer,"hi_num_suf" text,"linear_name_id" numeric,"x" numeric,"y" numeric,"latitude" numeric,"longitude" numeric,"objectid" integer,"source_address" text,"geom" geometry)


INFO:__main__:Successfully inserted 1000 records into school
INFO:__main__:Successfully inserted 194 records into school
INFO:__main__:All records from [mapserver: cot_geospatial28, layerID: 17] have been inserted into school
