In [1]:
import configparser
import requests
import datetime
from psycopg2 import connect
from psycopg2 import sql
from psycopg2.extras import execute_values
import logging
from time import sleep
import click
CONFIG = configparser.ConfigParser()
CONFIG.read('/home/bqu/db_ec2.cfg')
#CONFIG.read('/home/bqu/db_morbius.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

/opt/jupyterhub/lib64/python3.9/site-packages/IPython/core/interactiveshell.py


## Logger

In [2]:
"""The following provides information about the code when it is running and prints out the log messages 
if they are of logging level equal to or greater than INFO"""
LOGGER = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

## Get mapserver name and generate table name

In [3]:
def mapserver_name(mapserver_n):
    """
    Function to return cot_geospatial mapserver from integer

    Parameters
    -----------
    mapserver_n 
        the mapserver number

    Return
    -------
    map
    """
    
    switcher ={
        0 : 'cot_geospatial',
        2 : 'cot_geospatial2',
        3 : 'cot_geospatial3',
        5 : 'cot_geospatial5',
        6 : 'cot_geospatial6', 
        7 : 'cot_geospatial7',
        8 : 'cot_geospatial8',
        10 : 'cot_geospatial10',
        11 : 'cot_geospatial11',
        12 : 'cot_geospatial12',
        13 : 'cot_geospatial13',
        14 : 'cot_geospatial14',
        15 : 'cot_geospatial15',
        16 : 'cot_geospatial16',
        17 : 'cot_geospatial17',
        18 : 'cot_geospatial18',
        19 : 'cot_geospatial19',
        20 : 'cot_geospatial20',
        21 : 'cot_geospatial21',
        22 : 'cot_geospatial22',
        23 : 'cot_geospatial23',
        24 : 'cot_geospatial24',
        25 : 'cot_geospatial25',
        26 : 'cot_geospatial26',
        27 : 'cot_geospatial27',
        28 : 'cot_geospatial28'
        }
    func = switcher.get(mapserver_n)
    return(func)


In [17]:
mapserver = mapserver_name(0)
print(mapserver)

cot_geospatial


In [4]:
def get_tablename(mapserver, layer_id, include_date = False):
    """
    Function to retrieve the name of the layer

    Parameters
    -----------
    mapserver
        The mapserver that host the layer
    layer_id
        The id of the layer

    Returns
    --------
    output_name
        The table name of the layer in database
    """
    url = 'https://insideto-gis.toronto.ca/arcgis/rest/services/'+mapserver+'/MapServer/layers?f=json'
    r = requests.get(url, verify = False)
    ajson = r.json()
    layers = ajson['layers']
    for layer in layers:
        if layer['id'] == layer_id:
            output_name = (layer['name'].lower()).replace(' ', '_')
        else:
            continue
    
    # For the layers that will be pulled into a partitioned table, add the current pull's date to table name
    if include_date:
        today = datetime.date.today().strftime('_%Y%m%d')
        output_name = output_name + today
    return output_name

In [6]:
layer_id = 2
get_tablename(mapserver, layer_id)



'centreline'

## Create table in DB

In [5]:
def get_fieldtype(field):
    if field == 'esriFieldTypeInteger' or field == 'esriFieldTypeSingle' or field == 'esriFieldTypeInteger' or field=='esriFieldTypeOID' or field == 'esriFieldTypeSmallInteger' or field =='esriFieldGlobalID':
        fieldtype = 'integer'
    elif field == 'esriFieldTypeString':
        fieldtype = 'text'
    elif field == 'esriFieldTypeDouble':
        fieldtype = 'numeric'
    elif field == 'esriFieldTypeDate':
        fieldtype = 'timestamp without time zone'
    return fieldtype

In [6]:
def create_table(output_table, return_json, schema_name):
    '''Create a new table in postgresql for the layer'''
    
    fields = return_json['fields']
    
    insert_column= '('
    for field in fields:
        column_name = (field['name'].lower()).replace('.', '_')
        insert_column = insert_column + column_name +','
    insert_column = insert_column + 'geom)'
    
    print(insert_column)
    with con:
        with con.cursor() as cur:
            col_list = sql.SQL(',').join([sql.Identifier((field['name'].lower()).replace('.', '_')) + sql.SQL(' ') + sql.SQL(get_fieldtype(field["type"])) for field in fields])
            create_sql = sql.SQL("CREATE TABLE IF NOT EXISTS {schema}.{table} ({columns})").format(schema = sql.Identifier(schema_name),
                                                                      table = sql.Identifier(output_table),
                                                                      columns = col_list)
            print(create_sql.as_string(con))
            cur.execute(create_sql)
    return insert_column

In [7]:
# Geometry Switcher 
def line(geom):
    return 'SRID=4326;LineString('+','.join(' '.join(str(x) for x in tup) for tup in geom['paths'][0]) +')'
def polygon(geom):
    return 'SRID=4326;MultiPolygon((('+','.join(' '.join(str(x) for x in tup) for tup in geom['rings'][0]) +')))'
def point(geom):
    return 'SRID=4326;Point('+(str(geom['x']))+' '+ (str(geom['y']))+')'  
def get_geometry(geometry_type, geom):
    switcher = {
        'esriGeometryLine':line,
        'esriGeometryPolyline': line, 
        'esriGeometryPoint': point, 
        'esriGeometryMultiPolygon': polygon,
        'esriGeometryPolygon': polygon
    }
    func = switcher.get(geometry_type)
    geometry = (func(geom)) 
    return geometry

In [8]:
def to_time(input):
    '''Convert epoch time to postgresql timestamp without time zone'''    
    time = datetime.datetime.fromtimestamp(abs(input)/1000).strftime('%Y-%m-%d %H:%M:%S')
    return time

## Insert data from ArcGIS to DB

In [9]:
def get_data(mapserver, layer_id, max_number = None, record_max = None):
    '''Get data from gcc view rest api'''        
    base_url = "https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/{}/query".format(mapserver, layer_id)
    
    """ Added stuff """
    '''If the data we want to get is centreline'''
    """
    if layer_id == 2:
        query = {
    """
            
    
    query = {"where":"1=1",
             "outFields": "*",
             "outSR": '4326',         
             "returnGeometry": "true",
             "returnTrueCurves": "false",
             "returnIdsOnly": "false",
             "returnCountOnly": "false",
             "returnZ": "false",
             "returnM": "false",
             "orderByFields": "OBJECTID", 
             "returnDistinctValues": "false",
             "returnExtentsOnly": "false",
             "resultOffset": "{}".format(max_number),
             "resultRecordCount": "{}".format(record_max),
             "f":"json"}
    while True:
        try :
            r = requests.get(base_url, params = query, verify = False)
        except requests.exceptions.ConnectionErrors:
            sleep(10)
            continue
        else:
            return_json = r.json() 
            break
    return return_json

In [10]:
def find_limit(return_json):
    '''Check if last query return all rows'''   
    if return_json.get('exceededTransferLimit', False) == True:
        keep_adding = True
    else:
        keep_adding = False
    return keep_adding   

In [11]:
def insert_data(output_table, insert_column, return_json, schema_name):
    '''Send data to postgresql'''   
    rows = []
    features = return_json['features']
    fields = return_json['fields']
    trials = [[field['name'], field['type']] for field in fields]
    for feature in features:
        geom = feature['geometry']
        geometry_type = return_json['geometryType']
        geometry = get_geometry(geometry_type, geom)
        row = [feature['attributes'][trial[0]] if trial[1] != 'esriFieldTypeDate' or feature['attributes'][trial[0]] == None else to_time(feature['attributes'][trial[0]]) for trial in trials]
        row.append(geometry)
        
        rows.append(row)
    
    insert=sql.SQL("INSERT INTO {schema}.{table} {columns} VALUES %s").format(
        schema = sql.Identifier(schema_name), 
        table = sql.Identifier(output_table), 
        columns = sql.Identifier(insert_column)
    )
    with con:
        with con.cursor() as cur:
               execute_values(cur, insert, rows)
    LOGGER.info('Successfully inserted %d records into %s', len(rows), output_table)

## Main function that the Task calls

In [12]:
# Added 'schema_name' to the function
def get_layer(mapserver_n, layer_id, schema_name, include_date = False):
    
    """
    This function calls to the GCCview rest API and inserts the outputs to the output table in the postgres database.

    Parameters
    ----------
    mapserver : int
        The name of the mapserver that host the desire layer

    layer_id : int
        The id of desire layer
        
    """  
    mapserver = mapserver_name(mapserver_n)
    output_table = get_tablename(mapserver, layer_id, include_date)
    keep_adding = True
    counter = 0

    while keep_adding == True:
        
        if counter == 0:
            return_json = get_data(mapserver, layer_id)
            # Added 'schema_name'
            insert_column = create_table(output_table, return_json, schema_name)
            features = return_json['features']
            record_max=(len(features))
            max_number = record_max
            # Added 'schema_name'
            insert_data(output_table, insert_column, return_json, schema_name)
            counter += 1
            keep_adding = find_limit(return_json)
            if keep_adding == False:
                LOGGER.info('All records from [mapserver: %s, layerID: %d] have been inserted into %s', mapserver, layer_id, output_table)
        else:
            return_json = get_data(mapserver, layer_id, max_number = max_number, record_max = record_max)
            insert_data(output_table, insert_column, return_json, schema_name)
            counter += 1
            keep_adding = find_limit(return_json)
            if keep_adding == True:
                max_number = max_number + record_max
            else:
                LOGGER.info('All records from [mapserver: %s, layerID: %d] have been inserted into %s', mapserver, layer_id, output_table)

## Testing out input values

In [13]:
mapserver_n = 2
layer_id = 2
schema_name = 'bqu'

In [14]:
get_layer(mapserver_n, layer_id, schema_name)



(centreline_id,address_l,address_r,linear_name_full,linear_name_label,parity_l,parity_r,feature_code_desc,feature_code,from_intersection_id,to_intersection_id,lo_num_l,hi_num_l,lo_num_r,hi_num_r,linear_name_id,jurisdiction,trans_id_create,objectid,mi_prinx,rec_id,geo_id,root_id,routenum,type_code,type_desc,status_code,status_desc,shape_len,geom)
CREATE TABLE IF NOT EXISTS "bqu"."bikeway" ("centreline_id" integer,"address_l" text,"address_r" text,"linear_name_full" text,"linear_name_label" text,"parity_l" text,"parity_r" text,"feature_code_desc" text,"feature_code" integer,"from_intersection_id" integer,"to_intersection_id" integer,"lo_num_l" integer,"hi_num_l" integer,"lo_num_r" integer,"hi_num_r" integer,"linear_name_id" integer,"jurisdiction" text,"trans_id_create" numeric,"objectid" integer,"mi_prinx" integer,"rec_id" integer,"geo_id" numeric,"root_id" text,"routenum" integer,"type_code" integer,"type_desc" text,"status_code" integer,"status_desc" text,"shape_len" numeric)


SyntaxError: syntax error at or near ""(centreline_id,address_l,address_r,linear_name_full,linear_name_label,parity_l,parity_r,feature_code_desc,feature_code,from_intersection_id,to_intersection_id,lo_num_l,hi_num_l,lo_num_r,hi_num_r,linear_name_id,jurisdiction,trans_id_create,objectid,mi_prinx,rec_id,geo_id,root_id,routenum,type_code,type_desc,status_code,status_desc,shape_len,geom)""
LINE 1: INSERT INTO "bqu"."bikeway" "(centreline_id,address_l,addres...
                                    ^
