In [97]:
import configparser
import requests
import datetime
from pathlib import Path
from requests import Session
from psycopg2 import connect
from psycopg2.extras import execute_values
from time import sleep
CONFIG = configparser.ConfigParser()
CONFIG.read(str(Path.home().joinpath('db.cfg')))
dbset = CONFIG['DBSETTINGS']
con = connect(**dbset)

In [98]:
def mapserver_name(mapserver_n):
    switcher ={
        0 : 'cot_geospatial',
        2 : 'cot_geospatial2',
        3 : 'cot_geospatial3',
        5 : 'cot_geospatial5',
        6 : 'cot_geospatial6', 
        7 : 'cot_geospatial7',
        8 : 'cot_geospatial8',
        10 : 'cot_geospatial10',
        11 : 'cot_geospatial11',
        12 : 'cot_geospatial12',
        13 : 'cot_geospatial13',
        14 : 'cot_geospatial14',
        15 : 'cot_geospatial15',
        16 : 'cot_geospatial16',
        17 : 'cot_geospatial17',
        18 : 'cot_geospatial18',
        19 : 'cot_geospatial19',
        20 : 'cot_geospatial20',
        21 : 'cot_geospatial21',
        22 : 'cot_geospatial22',
        23 : 'cot_geospatial23',
        24 : 'cot_geospatial24',
        25 : 'cot_geospatial25',
        26 : 'cot_geospatial26',
        27 : 'cot_geospatial27',
        28 : 'cot_geospatial28'
         }
    func = switcher.get(mapserver_n)
    return(func)


In [99]:
def get_tablename(mapserver, id):
    '''Retrieve the name of the layer'''
    url = 'https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/layers?f=json'.format(mapserver)
    r = requests.get(url, verify = False)
    ajson = r.json()
    layers = ajson['layers']
    for layer in layers:
        if layer['id'] == id:
            output_name = (layer['name'].lower()).replace(' ', '_')
        else:
            continue
    return output_name       

In [100]:
def create_table(output_table, return_json):
    '''Create a new table in postgresql for the layer'''
    new_column = '('
    insert_column= '('
    fields = return_json['fields']
    for field in fields:
        if field['type'] == 'esriFieldTypeInteger' or field['type'] == 'esriFieldTypeInteger' or field['type'] =='esriFieldTypeOID' or field['type'] == 'esriFieldTypeSmallInteger' or field['type'] =='esriFieldGlobalID':
                column_type = 'integer'
        elif field['type'] == 'esriFieldTypeString':
                column_type = 'text'
        elif field['type'] == 'esriFieldTypeDouble':
                column_type = 'numeric'
        elif field['type'] == 'esriFieldTypeDate':
                column_type = 'timestamp without time zone'
                
        column_name = (field['name'].lower()).replace('.', '_') 
        new_column = new_column + column_name +' '+column_type+', '
        insert_column = insert_column + column_name +','

    new_column = new_column +'geom geometry)' 
    insert_column = insert_column + 'geom)'
    sql= '''create table _{} {}'''.format(output_table, new_column)
    
    with con:
        with con.cursor() as cur:
            cur.execute(sql) 
    # Add primary key
    sql = '''alter table _{} add primary key ({})'''.format(output_table, primary_key) 
    with con:
        with con.cursor() as cur:
            cur.execute(sql)               
    return insert_column        

In [78]:
# Geometry Switcher 
def line(geom):
    return 'SRID=4326;LineString('+','.join(' '.join(str(x) for x in tup) for tup in geom['paths'][0]) +')'
def polygon(geom):
    return 'SRID=4326;MultiPolygon((('+','.join(' '.join(str(x) for x in tup) for tup in geom['rings'][0]) +')))'
def point(geom):
    return 'SRID=4326;Point('+(str(geom['x']))+' '+ (str(geom['y']))+')'  
def get_geometry(geometry_type, geom):
    switcher = {
        'esriGeometryLine':line,
        'esriGeometryPolyline': line, 
        'esriGeometryPoint': point, 
        'esriGeometryMultiPolygon': polygon,
        'esriGeometryPolygon': polygon
    }
    func = switcher.get(geometry_type)
    geometry = (func(geom)) 
    return geometry

In [79]:
def to_time(input):
    '''Convert epoch time to postgresql timestamp without time zone'''    
    time = datetime.datetime.fromtimestamp(input/1000).strftime('%Y-%m-%d %H:%M:%S')
    return time

In [94]:
def get_data(mapserver, id, max_number = None, record_max = None):
    '''Get data from gcc view rest api'''        
    base_url = "https://insideto-gis.toronto.ca/arcgis/rest/services/{}/MapServer/{}/query".format(mapserver, id)
    query = {"where":"1=1",
             "outFields": "*",
             "outSR": '4326',         
             "returnGeometry": "true",
             "returnTrueCurves": "false",
             "returnIdsOnly": "false",
             "returnCountOnly": "false",
             "returnZ": "false",
             "returnM": "false",
             "orderByFields": "OBJECTID", 
             "returnDistinctValues": "false",
             "returnExtentsOnly": "false",
             "resultOffset": "{}".format(max_number),
             "resultRecordCount": "{}".format(record_max),
             "f":"json"}
    while True:
        try :
            r = requests.get(base_url, params = query, verify = False)
        except requests.exceptions.SSLError:
            sleep(10)
            continue
        else:
            return_json = r.json() 
            break
    return return_json

In [69]:
def find_limit(return_json):
    '''Check if last query return all rows'''   
    if return_json.get('exceededTransferLimit', False) == True:
        rule = 'add'
    else:
        rule = 'dont add'  
    return rule   

In [70]:
def send_tempdata(output_table, insert_column, return_json):
    '''Send data to postgresql'''   
    rows = []
    features = return_json['features'] 
    fields = return_json['fields']
    trials = [[field['name'], field['type']] for field in fields]
    for feature in features:
        geom = feature['geometry']
        geometry_type = return_json['geometryType']
        geometry = get_geometry(geometry_type, geom)
        row = [feature['attributes'][trial[0]] if trial[1] != 'esriFieldTypeDate' or feature['attributes'][trial[0]] == None else to_time(feature['attributes'][trial[0]]) for trial in trials]
        row.append(geometry)
        rows.append(row)
    
    sql='INSERT INTO _{} {} VALUES %s'.format(output_table, insert_column)
    with con:
        with con.cursor() as cur:
            execute_values(cur, sql, rows)    
    print('sent')

In [101]:
# get layer's info

def centreline():
    return['geo_id', 'trans_id_create']
def bike():
    return['objectid', 'trans_id_create']
def bylaw():
    return['objectid', 'last_updated_date']

def get_info(mapserver, id):
    server_info = str(mapserver)+'_'+str(id)
    return server_info

def info(server_info):
    switcher = {
            '0_2': centreline,
            '2_2': bike,
            '2_39': bylaw  
    }
    func = switcher.get(server_info)
    info1 = func()
    return info1

In [104]:
def get_layer(mapserver, id):
    
    """
    This function calls to the GCCview rest API and inserts the outputs to the output table in the postgres database.

    Parameters
    ----------
    mapserver : string
        The name of the mapserver that host the desire layer

    id : int
        The id of desire layer
        
    """  
    output_table = get_tablename(mapserver, id)
    info_detail = info(get_info(mapserver_n,id))
    primary_key = info_detail[0]
    where_id = info_detail[1]
    
    rule = "add"
    counter = 0
    
    while rule == "add":
           
        if counter == 0:
            return_json = get_data(mapserver, id)
            insert_column = create_table(output_table, return_json, primary_key)
            features = return_json['features']
            record_max=(len(features))
            max_number = record_max
            send_tempdata(output_table, insert_column, return_json)
            counter += 1
            rule = find_limit(return_json)
            if rule != 'add':
                print('all rows inserted in _', output_table)
        else:
            return_json = get_data(mapserver, id, max_number = max_number, record_max = record_max)
            send_tempdata(output_table, insert_column, return_json)
            counter += 1
            rule = find_limit(return_json)
            if rule == 'add':
                max_number = max_number + record_max
            else:
                print('all rows inserted in _', output_table)
    
    find_diff(output_table, insert_column, primary_key, where_id)

In [103]:
# compare tables and delete or add
def find_diff(output_table,insert_column, primary_key, where_id):
    now = datetime.datetime.now()
    date = (str(now.year)+str(now.month)+str(now.day))
    
    insert_column1 = ((insert_column.replace('(', '')).replace(')', '')).split(",")
    except_column = '('
    for i in insert_column1:
            except_value = "EXCLUDED.{}".format(i)
            except_column = except_column + except_value +','        
   
    excluded_column = except_column[:-1]+')'

    # Find if old table exists
    sql = '''select count(1) from information_schema.tables where table_schema = 'natalie' and table_name = '{}'
            '''.format(output_table)
    with con:
        with con.cursor() as cur:
            cur.execute(sql)
            result = cur.fetchone()
           # If table exists
            if result[0] == 1:
                # Delete rows that no longer exists in the new table
                sql= '''DELETE FROM {} WHERE {} = 
                        (select {} FROM {} 
                        except 
                        select {} from _{})'''.format(output_table, primary_key, primary_key, output_table, primary_key, output_table)
              
                with con:
                    with con.cursor() as cur:
                        cur.execute(sql)
               
                
                # And then upsert stuff
                sql = ''' INSERT into {} as a {} select {} from _{} on conflict({}) do update set {} = {}
                where a.{} <> Excluded.{}
                ; COMMENT ON  TABLE {} IS 'last updated: {}' '''.format(output_table, insert_column, (insert_column[:-1])[1:], output_table, primary_key, insert_column,excluded_column,where_id, where_id, output_table, date)

                with con:
                    with con.cursor() as cur:
                        cur.execute(sql) 
                        
                # And then drop the temp table
                sql = '''DROP TABLE _{}'''.format(output_table)
                with con:
                    with con.cursor() as cur:
                         cur.execute(sql)
                            
            # if table does not exists -> create a new one
            else: 
                sql = '''ALTER TABLE _{} RENAME to {}; COMMENT ON {} is 'last updated: {}' '''.format(output_table, output_table, date)
                with con:
                    with con.cursor() as cur:
                        cur.execute(sql)
                        print('created new table')
        

In [None]:
get_layer(2, 2)