In [743]:
import configparser
import requests
import datetime
from requests import Session
from psycopg2 import connect
from psycopg2.extras import execute_values
from time import sleep
CONFIG = configparser.ConfigParser()
CONFIG.read(r'C:\Users\nchan6\Documents\db.cfg')
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

In [923]:
def get_tablename(mapserver, id):
    '''Retrieve the name of the layer'''
    url = 'https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/layers?f=json'.format(mapserver)
    r = requests.get(url, verify = False)
    ajson = r.json()
    layers = ajson['layers']
    for layer in layers:
        if layer['id'] == id:
            output_name = (layer['name'].lower()).replace(' ', '_')
        else:
            continue
    return output_name       

In [941]:
def create_table(output_table, return_json):
    '''Create a new table in postgresql for the layer'''
    new_column = '('
    insert_column= '('
    fields = return_json['fields']
    for field in fields:
        if field['type'] == 'esriFieldTypeInteger' or field['type'] == 'esriFieldTypeInteger' or field['type'] =='esriFieldTypeOID' or field['type'] == 'esriFieldTypeSmallInteger' or field['type'] =='esriFieldGlobalID':
                column_type = 'integer'
        elif field['type'] == 'esriFieldTypeString':
                column_type = 'text'
        elif field['type'] == 'esriFieldTypeDouble':
                column_type = 'numeric'
        elif field['type'] == 'esriFieldTypeDate':
                column_type = 'timestamp without time zone'
                
        column_name = (field['name'].lower()).replace('.', '_') 
        new_column = new_column + column_name +' '+column_type+', '
        insert_column = insert_column + column_name +','

    new_column = new_column +'geom geometry)' 
    insert_column = insert_column + 'geom)'
    sql= '''create table _{} {}'''.format(output_table, new_column)
    
    with con:
        with con.cursor() as cur:
            cur.execute(sql) 
            
    return insert_column        

In [911]:
# Geometry Switcher 
def line(geom):
    return 'SRID=4326;LineString('+','.join(' '.join(str(x) for x in tup) for tup in geom['paths'][0]) +')'
def polygon(geom):
    return 'SRID=4326;MultiPolygon((('+','.join(' '.join(str(x) for x in tup) for tup in geom['rings'][0]) +')))'
def point(geom):
    return 'SRID=4326;Point('+(str(geom['x']))+' '+ (str(geom['y']))+')'  
def get_geometry(geometry_type, geom):
    switcher = {
        'esriGeometryLine':line,
        'esriGeometryPolyline': line, 
        'esriGeometryPoint': point, 
        'esriGeometryMultiPolygon': polygon,
        'esriGeometryPolygon': polygon
    }
    func = switcher.get(geometry_type)
    geometry = (func(geom)) 
    return geometry

In [925]:
def to_time(input):
    '''Convert epoch time to postgresql timestamp without time zone'''    
    time = datetime.datetime.fromtimestamp(input/1000).strftime('%Y-%m-%d %H:%M:%S')
    return time

In [926]:
def get_data(mapserver, id, max_number = None, record_max = None):
    '''Get data from gcc view rest api'''        
    base_url = "https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/{}/query".format(mapserver, id)
    query = {"where":"1=1",
             "outFields": "*",
             "outSR": '4326',         
             "returnGeometry": "true",
             "returnTrueCurves": "false",
             "returnIdsOnly": "false",
             "returnCountOnly": "false",
             "returnZ": "false",
             "returnM": "false",
             "orderByFields": "OBJECTID", 
             "returnDistinctValues": "false",
             "returnExtentsOnly": "false",
             "resultOffset": "{}".format(max_number),
             "resultRecordCount": "{}".format(record_max),
             "f":"json"}
    while True:
        try :
            r = requests.get(base_url, params = query, verify = False)
        except requests.exceptions.SSLErrors:
            sleep(10)
            continue
        else:
            return_json = r.json() 
            break
    return return_json

In [928]:
def find_limit(return_json):
    '''Check if last query return all rows'''   
    if return_json.get('exceededTransferLimit', False) == True:
        rule = 'add'
    else:
        rule = 'dont add'  
    return rule   

In [938]:
def send_tempdata(output_table, insert_column, return_json):
    '''Send data to postgresql'''   
    rows = []
    features = return_json['features'] 
    fields = return_json['fields']
    trials = [[field['name'], field['type']] for field in fields]
    for feature in features:
        geom = feature['geometry']
        geometry_type = return_json['geometryType']
        geometry = get_geometry(geometry_type, geom)
        row = [feature['attributes'][trial[0]] if trial[1] != 'esriFieldTypeDate' or feature['attributes'][trial[0]] == None else to_time(feature['attributes'][trial[0]]) for trial in trials]
        row.append(geometry)
        rows.append(row)
    
    sql='INSERT INTO _{} {} VALUES %s'.format(output_table, insert_column)
    with con:
        with con.cursor() as cur:
            execute_values(cur, sql, rows)    
    print('sent')

In [1020]:
def get_layer(mapserver, id):
    
    """
    This function calls to the GCCview rest API and inserts the outputs to the output table in the postgres database.

    Parameters
    ----------
    mapserver : string
        The name of the mapserver that host the desire layer

    id : int
        The id of desire layer
        
    """  
    output_table = get_tablename(mapserver, id)
    
    rule = "add"
    counter = 0
    
    while rule == "add":
           
        if counter == 0:
            return_json = get_data(mapserver, id)
            insert_column = create_table(output_table, return_json)
            features = return_json['features']
            record_max=(len(features))
            max_number = record_max
            send_tempdata(output_table, insert_column, return_json)
            counter += 1
            rule = find_limit(return_json)
            if rule != 'add':
                print('all rows inserted in _', output_table)
        else:
            return_json = get_data(mapserver, id, max_number = max_number, record_max = record_max)
            send_tempdata(output_table, insert_column, return_json)
            counter += 1
            rule = find_limit(return_json)
            if rule == 'add':
                max_number = max_number + record_max
            else:
                print('all rows inserted in _', output_table)
    
    print(find_diff(output_table))

In [1022]:
# compare tables and delete or add
def find_diff(output_table):
    now = datetime.datetime.now()
    date = (str(now.year)+str(now.month)+str(now.day))
    # Find changes in tables
    sql= '''select * from {} except select * from _{}'''.format(output_table, output_table)
    with con:
        with con.cursor() as cur:
            cur.execute(sql)
            # If there are no changes, delete the new table
            if cur.statusmessage == 'SELECT 0':
                sql = '''drop table _{}'''.format(output_table)
                with con:
                    with con.cursor() as cur:
                        cur.execute(sql)
                changes = 'no changes'
            # If there are changes, update the new table, and rename the old table with today's date
            else:
                sql = '''alter table {} rename to {}_{}; alter table _{} rename to {} '''.format(output_table, output_table, date, output_table, output_table)
                with con:
                    with con.cursor() as cur:
                        cur.execute(sql)
                changes = 'updated table, new table in {}, old table in {}_{}'.format(output_table, output_table, date)
            return changes

In [1021]:
get_layer('cot_geospatial2', 2)



sent




sent




sent




sent




sent




sent




sent




sent
all rows inserted in _ bikeway
updated table, new table in bikeway, old table in bikeway_2019627
