In [743]:
import configparser
import requests
import datetime
from requests import Session
from psycopg2 import connect
from psycopg2.extras import execute_values
from time import sleep
CONFIG = configparser.ConfigParser()
CONFIG.read(r'C:\Users\nchan6\Documents\db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

In [785]:
def get_tablename(mapserver, id):
    '''
    This function retrieve the name of the layer.
    '''
    url = 'https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/layers?f=json'.format(mapserver)
    r = requests.get(url, verify = False)
    ajson = r.json()
    layers = ajson['layers']
    for layer in layers:
        if layer['id'] == id:
            output_name = (layer['name'].lower()).replace(' ', '_')
        else:
            continue
    return output_name       

In [786]:
def create_table(output_table, return_json):
    new_column = '('
    insert_column= '('
    fields = return_json['fields']
    for field in fields:
        if field['type'] == 'esriFieldTypeInteger' or field['type'] == 'esriFieldTypeInteger' or field['type'] =='esriFieldTypeOID' or field['type'] == 'esriFieldTypeSmallInteger' or field['type'] =='esriFieldGlobalID':
                column_type = 'integer'
        elif field['type'] == 'esriFieldTypeString':
                column_type = 'text'
        elif field['type'] == 'esriFieldTypeDouble':
                column_type = 'numeric'
        elif field['type'] == 'esriFieldTypeDate':
                column_type = 'timestamp without time zone'
                
        column_name = (field['name'].lower()).replace('.', '_') 
        new_column = new_column + column_name +' '+column_type+', '
        insert_column = insert_column + column_name +','

    new_column = new_column +'geom geometry)' 
    insert_column = insert_column + 'geom)'
    sql= '''create table {} {}'''.format(output_table, new_column)
    
    with con:
        with con.cursor() as cur:
            cur.execute(sql) 
            
    return insert_column        

In [799]:
def geom_type(geometry_type, geom):
    if geometry_type == 'esriGeometryPolyline' or geometry_type == 'esriGeometryLine':
        geometry = 'SRID=4326;LineString('+','.join(' '.join(str(x) for x in tup) for tup in geom['paths'][0]) +')'
        
    elif geometry_type =='esriGeometryPoint':
        geometry = 'SRID=4326;Point('+(str(geom['x']))+' '+ (str(geom['y']))+')'                       
        
    elif geometry_type =='esriGeometryPolygon' or geometry_type =='esriGeometryMultiPolygon':
        geometry = 'SRID=4326;MultiPolygon((('+','.join(' '.join(str(x) for x in tup) for tup in geom['rings'][0]) +')))'    
   
    return geometry                 

In [683]:
def to_time(input):
    '''
    This function converts epoch unix datetime to timestamp without time zone for postgresql input.

    '''
    time = datetime.datetime.fromtimestamp(input/1000).strftime('%Y-%m-%d %H:%M:%S')
    return time

In [810]:
def get_data(mapserver, id, max_number = None, record_max = None):
    base_url = "https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/{}/query".format(mapserver, id)
    query = {"where":"1=1",
             "outFields": "*",
             "outSR": '4326',         
             "returnGeometry": "true",
             "returnTrueCurves": "false",
             "returnIdsOnly": "false",
             "returnCountOnly": "false",
             "returnZ": "false",
             "returnM": "false",
             "orderByFields": "OBJECTID", 
             "returnDistinctValues": "false",
             "returnExtentsOnly": "false",
             "resultOffset": "{}".format(max_number),
             "resultRecordCount": "{}".format(record_max),
             "f":"json"}
    while True:
        try :
            r = requests.get(base_url, params = query, verify = False)
        except requests.exceptions.SSLErros:
            sleep(10)
            continue
        else:
            return_json = r.json() 
            break
    return return_json

In [681]:
def find_limit(return_json):
    if return_json.get('exceededTransferLimit', False) == True:
        rule = 'add'
    else:
        rule = 'dont add'  
    
    return rule   

In [800]:
def send_data(output_table, insert_column, return_json):
    
    rows = []
    features = return_json['features'] 
    fields = return_json['fields']
    trials = [[field['name'], field['type']] for field in fields]
    for feature in features:
        geom = feature['geometry']
        geometry = geom_type(return_json['geometryType'], geom)
        row = [feature['attributes'][trial[0]] if trial[1] != 'esriFieldTypeDate' or feature['attributes'][trial[0]] == None else to_time(feature['attributes'][trial[0]]) for trial in trials]
        row.append(geometry)
        rows.append(row)
    
    sql='INSERT INTO {} {} VALUES %s'.format(output_table, insert_column)
    with con:
        with con.cursor() as cur:
            execute_values(cur, sql, rows)    
    print('sent')

In [808]:
def get_layer(mapserver, id):
    
    """
    This function calls to the GCCview rest API and inserts the outputs to the output table in the postgres database.

    Parameters
    ----------
    mapserver : string
        The name of the mapserver that host the desire layer

    id : int
        The id of desire layer
        
    """  
    output_table = get_tablename(mapserver, id)
    
    rule = "add"
    counter = 0
    
    while rule == "add":
           
        if counter == 0:
            return_json = get_data(mapserver, id, max_number = '', record_max = '')
            insert_column = create_table(output_table, return_json)
            features = return_json['features']
            record_max=(len(features))
            max_number = record_max
            send_data(output_table, insert_column, return_json)
            counter += 1
            rule = find_limit(return_json)
            if rule != 'add':
                print('all rows inserted in ', output_table)
        else:
            return_json = get_data(mapserver, id, max_number = max_number, record_max = record_max)
            send_data(output_table, insert_column, return_json)
            counter += 1
            rule = find_limit(return_json)
            if rule == 'add':
                max_number = max_number + record_max
            else:
                print('all rows inserted in ', output_table)


In [807]:
get_layer('cot_geospatial2', 5)



https://insideto-gis.toronto.ca/arcgis/rest/services/cot_geospatial2/MapServer/5/query
{'where': '1=1', 'outFields': '*', 'outSR': '4326', 'returnGeometry': 'true', 'returnTrueCurves': 'false', 'returnIdsOnly': 'false', 'returnCountOnly': 'false', 'returnZ': 'false', 'returnM': 'false', 'orderByFields': 'OBJECTID', 'returnDistinctValues': 'false', 'returnExtentsOnly': 'false', 'resultOffset': '', 'resultRecordCount': '', 'f': 'json'}
sent
all rows inserted in  current_road_restriction
