In [2]:
import os
import pandas as pd
import logging
import geopandas as gpd
from shapely.geometry import Point
import requests, json

pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


def geocode_address_google(address_line='',
                           locality='San Diego',
                           state='CA',
                           **kwargs):
    """Geocoding function using Google geocoding API."""
    address_line = str(address_line)
    locality = str(locality)
    state = str(state)
    google_token = 'AIzaSyCANr1U_qnHwX2y2WaU0YK5Wn-BCept3Ys'
    url = 'https://maps.googleapis.com/maps/api/geocode/json?'\
          + 'address={address}&'\
          + 'components=country:US|'\
          + 'administrative_area:{state}|'\
          + 'locality:{locality}&'\
          + 'key={google_token}'

    url = url.format(address=address_line,
                     state=state,
                     locality=locality,
                     google_token=google_token)

    print('Google Geocoding for: ' + address_line)
    if address_line in ['None', '', 'NaN', 'nan']:
        print('No geocode for: ' + address_line)
        return None, None
    else:
        try:
            r = requests.get(url, timeout=10)
            r.raise_for_status()
            body = json.loads(r.content)
            candidates = body['results']
            if candidates == []:
                print('No geocode for: ' + address_line)
                return None, None
            else:
                coords = body['results'][0]['geometry']['location']
                lat = coords['lat']
                lon = coords['lng']
                print('Geocode success for: ' + address_line)
                return lat, lon
        except Exception, e:
            logging.error(e)
            return None, None
        
        

        
def get_address_for_apn(apn):

    import requests

    url = "https://gissd.sandag.org/rdw/rest/services/Parcel/Parcels/MapServer/1/query"

    querystring = {
        "where":"APN IN ('{}')".format(apn),
        #"where": where_stmt,
        "objectIds":"",
        "time":"",
        "geometry":"",
        "geometryType":"esriGeometryEnvelope",
        "inSR":"",
        "spatialRel":"esriSpatialRelIntersects",
        "distance":"",
        "units":"esriSRUnit_Foot",
        "relationParam":"",
        "outFields": "*",
        #"outFields":"*",
        "returnGeometry":"true",
        "maxAllowableOffset":"",
        "geometryPrecision":"",
        "outSR":"4326",
        "gdbVersion":"",
        "returnDistinctValues":"false",
        "returnIdsOnly":"false",
        "returnCountOnly":"false",
        "returnExtentOnly":"false",
        "orderByFields":"",
        "groupByFieldsForStatistics":"",
        "outStatistics":"",
        "returnZ":"false",
        "returnM":"false",
        "multipatchOption":"",
        "resultOffset":"",
        "resultRecordCount":"",
        "f":"json"}

    headers = {
        'Cache-Control': "no-cache",
        'Postman-Token': "45d06817-feae-4fec-8dff-71c088d518d7"
        }

    print ("Get address for APN {}".format(apn))
    response = requests.request("POST", url, headers=headers, params=querystring)
    data = response.json()
    #gdf = gpd.GeoDataFrame.from_features(data['features'])

    #gdf.head()

    address = ""
    if data:
        apn_info = data['features'][0]['attributes']
        address = "{} {} {}".format(apn_info['SITUS_ADDRESS'], apn_info['SITUS_STREET'], apn_info['SITUS_SUFFIX'])

   
    return address

height has been deprecated.



In [3]:
sfv = pd.read_csv('/usr/local/airflow/poseidon/data/temp/tsw_violations_sf_temp.csv', dtype={
    'Site: Street Number': str,
    'Violation Date Formatted': str,
    'BMP Compliance Deadline': str
})
#sfv.head()

sfv.columns = ['V_NUM', 
               'STATUS', 
               'STATUS_1', 
               'V_TYPE', 
               'TYPE', 
               'V_DATE', 
               'BMP_COMP_DEADLINE',
               'SITE_ID',
               'PARCEL_APN',
               'ADDRESS_NUM',
               'ADDRESS_STREET',
               'ADDRESS_CITY',
               'ADDRESS_STATE',
               'VIOLATOR']

sfv.head()

Unnamed: 0,V_NUM,STATUS,STATUS_1,V_TYPE,TYPE,V_DATE,BMP_COMP_DEADLINE,SITE_ID,PARCEL_APN,ADDRESS_NUM,ADDRESS_STREET,ADDRESS_CITY,ADDRESS_STATE,VIOLATOR
0,VI-0007250,Unresolved,Initial Review Complete,BMP Violation,Notice of Deficient Maintenance,9/12/2018,9/26/2018,Site-138069,6461104000,7847.0,Airway Rd,San Diego,CA,Copart Auto Storage
1,VI-0007930,Unresolved,Initial Review Complete,BMP Violation,Notice of Deficient Maintenance,9/18/2018,10/2/2018,Site-138723,6461001400,2241.0,Cactus Rd,San Diego,CA,ADESA San Diego
2,VI-0004596,Unresolved,Review Complete,BMP Violation,Structural Maintenance Citation,,,Site-138747,2673607700,,End of Lower Scarborough Ct.,San Diego,CA,"Black Mountain Ranch, LLC"
3,VI-0007259,Unresolved,Initial Review Complete,BMP Violation,Notice of Deficient Maintenance,9/27/2018,10/11/2018,Site-138769,4416501300,3245.0,Sports Arena Blvd,San Diego,CA,SunBelt Investments
4,VI-0007260,Unresolved,Initial Review Complete,BMP Violation,Structural Maintenance Citation,9/27/2018,10/11/2018,Site-138769,4416501300,3245.0,Sports Arena Blvd,San Diego,CA,SunBelt Investments


In [4]:
sfv.loc[sfv.PARCEL_APN.isnull(), "PARCEL_APN"] = 0
sfv['PARCEL_APN'] = sfv.PARCEL_APN.astype(int)
sfv['SRC'] = 'TSW_SF'
sfv['LON'] = ''
sfv['LAT'] = ''
sfv['STATUS'] = sfv['STATUS']
sfv['TYPE'] = sfv['V_TYPE'] + ': ' + sfv['TYPE']
sfv['UUID'] = (sfv['SRC'] + '_' + sfv['V_NUM'].str.replace('-', '_')).str.lower()
sfv['ADDRESS'] = (sfv['ADDRESS_NUM'] + ' ' + sfv['ADDRESS_STREET']).astype(str)
sfv['ISSUE_DATE'] = sfv.V_DATE
sfv['COMPLY_BY'] = sfv.BMP_COMP_DEADLINE
sfv['ADDITIONAL_1'] = sfv['STATUS_1']
sfv['ADDITIONAL_2'] = ''

sfv = sfv[['UUID',
           'SRC',
           'TYPE',
           'STATUS',
           'ISSUE_DATE',
           'COMPLY_BY',
           'PARCEL_APN',
           'LON',
           'LAT',
           'ADDRESS',
           'VIOLATOR',
           'ADDITIONAL_1',
           'ADDITIONAL_2']]

sfv.head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,tsw_sf_vi_0007250,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/12/2018,9/26/2018,6461104000,,,7847 Airway Rd,Copart Auto Storage,Initial Review Complete,
1,tsw_sf_vi_0007930,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/18/2018,10/2/2018,6461001400,,,2241 Cactus Rd,ADESA San Diego,Initial Review Complete,
2,tsw_sf_vi_0004596,TSW_SF,BMP Violation: Structural Maintenance Citation,Unresolved,,,2673607700,,,,"Black Mountain Ranch, LLC",Review Complete,
3,tsw_sf_vi_0007259,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/27/2018,10/11/2018,4416501300,,,3245 Sports Arena Blvd,SunBelt Investments,Initial Review Complete,
4,tsw_sf_vi_0007260,TSW_SF,BMP Violation: Structural Maintenance Citation,Unresolved,9/27/2018,10/11/2018,4416501300,,,3245 Sports Arena Blvd,SunBelt Investments,Initial Review Complete,


# VPM

In [5]:
vpm = pd.read_csv('/usr/local/airflow/poseidon/data/temp/tsw_violations_vpm_temp.csv')
vpm.head()

Unnamed: 0,sw_bmp_report_id,project_id,project_name,bmpr_state,is_nov,report_date,reinspection_date,permit_number,sw_inspection_type_id,title,section_id,section_title,location_street,location_city,location_state,location_zip,comments
0,3266,523,Task SS16002 Tierrasanta Rec Pool Center Repair,submitted,1,2018-06-18,2018-06-21,,1,Routine,1,Major Buildings,11238 Claremont Mesa Blvd,San Diego,CA,92124,Inspected by Construction Storm Water 6/15/201...
1,2194,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-29,2018-04-02,1837674.0,5,Re-Inspection,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,City Tracking Permit # 523718\r\nâ€¢Miscellane...
2,1974,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-21,2018-03-26,1837674.0,1,Routine,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,For your information: \r\n\r\nWe&#39;re expect...
3,1804,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-15,2018-04-16,1837674.0,1,Routine,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...
4,1805,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-15,2018-04-16,1837674.0,1,Routine,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...


In [6]:
vpm['SRC'] = 'PW_VPM'
vpm['sw_bmp_report_id'] = vpm['sw_bmp_report_id'].astype(str)
vpm['project_id'] = vpm['project_id'].astype(str)
vpm['permit_number'] = vpm.permit_number.astype(str)
vpm['UUID'] = (vpm['SRC'] + '_' + vpm['sw_bmp_report_id'] + '_' + vpm['project_id']).str.lower()
vpm['LON'] = ''
vpm['LAT'] = ''
vpm['STATUS'] = vpm['bmpr_state']
vpm['TYPE'] = vpm.title
vpm['PARCEL_APN'] = ''
vpm['ISSUE_DATE'] = vpm['report_date']
vpm['COMPLY_BY'] = vpm['reinspection_date']
vpm['ADDRESS'] = vpm.location_street.astype(str)
vpm['CITY'] = vpm.location_city
vpm['STATE'] = vpm.location_state
vpm['ZIP'] = vpm.location_zip
vpm['VIOLATOR'] = vpm.project_name
vpm['ADDITIONAL_1'] = vpm.comments
vpm['ADDITIONAL_2'] = ''
vpm.head()

Unnamed: 0,sw_bmp_report_id,project_id,project_name,bmpr_state,is_nov,report_date,reinspection_date,permit_number,sw_inspection_type_id,title,section_id,section_title,location_street,location_city,location_state,location_zip,comments,SRC,UUID,LON,LAT,STATUS,TYPE,PARCEL_APN,ISSUE_DATE,COMPLY_BY,ADDRESS,CITY,STATE,ZIP,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,3266,523,Task SS16002 Tierrasanta Rec Pool Center Repair,submitted,1,2018-06-18,2018-06-21,,1,Routine,1,Major Buildings,11238 Claremont Mesa Blvd,San Diego,CA,92124,Inspected by Construction Storm Water 6/15/201...,PW_VPM,pw_vpm_3266_523,,,submitted,Routine,,2018-06-18,2018-06-21,11238 Claremont Mesa Blvd,San Diego,CA,92124,Task SS16002 Tierrasanta Rec Pool Center Repair,Inspected by Construction Storm Water 6/15/201...,
1,2194,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-29,2018-04-02,1837674.0,5,Re-Inspection,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,City Tracking Permit # 523718\r\nâ€¢Miscellane...,PW_VPM,pw_vpm_2194_435,,,submitted,Re-Inspection,,2018-03-29,2018-04-02,1551 Shelter Island Dr,San Diego,CA,92106,OTC - 11000284 - Kona Kai Expansion,City Tracking Permit # 523718\r\nâ€¢Miscellane...,
2,1974,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-21,2018-03-26,1837674.0,1,Routine,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,For your information: \r\n\r\nWe&#39;re expect...,PW_VPM,pw_vpm_1974_435,,,submitted,Routine,,2018-03-21,2018-03-26,1551 Shelter Island Dr,San Diego,CA,92106,OTC - 11000284 - Kona Kai Expansion,For your information: \r\n\r\nWe&#39;re expect...,
3,1804,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-15,2018-04-16,1837674.0,1,Routine,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...,PW_VPM,pw_vpm_1804_435,,,submitted,Routine,,2018-03-15,2018-04-16,1551 Shelter Island Dr,San Diego,CA,92106,OTC - 11000284 - Kona Kai Expansion,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...,
4,1805,435,OTC - 11000284 - Kona Kai Expansion,submitted,1,2018-03-15,2018-04-16,1837674.0,1,Routine,10,District 2,1551 Shelter Island Dr,San Diego,CA,92106,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...,PW_VPM,pw_vpm_1805_435,,,submitted,Routine,,2018-03-15,2018-04-16,1551 Shelter Island Dr,San Diego,CA,92106,OTC - 11000284 - Kona Kai Expansion,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...,


In [7]:
vpm = vpm[['UUID',
   'SRC',
   'TYPE',
   'STATUS',
   'ISSUE_DATE',
   'COMPLY_BY',
   'PARCEL_APN',
   'LON',
   'LAT',
   'ADDRESS',
   'VIOLATOR',
   'ADDITIONAL_1',
   'ADDITIONAL_2']]

vpm.head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,pw_vpm_3266_523,PW_VPM,Routine,submitted,2018-06-18,2018-06-21,,,,11238 Claremont Mesa Blvd,Task SS16002 Tierrasanta Rec Pool Center Repair,Inspected by Construction Storm Water 6/15/201...,
1,pw_vpm_2194_435,PW_VPM,Re-Inspection,submitted,2018-03-29,2018-04-02,,,,1551 Shelter Island Dr,OTC - 11000284 - Kona Kai Expansion,City Tracking Permit # 523718\r\nâ€¢Miscellane...,
2,pw_vpm_1974_435,PW_VPM,Routine,submitted,2018-03-21,2018-03-26,,,,1551 Shelter Island Dr,OTC - 11000284 - Kona Kai Expansion,For your information: \r\n\r\nWe&#39;re expect...,
3,pw_vpm_1804_435,PW_VPM,Routine,submitted,2018-03-15,2018-04-16,,,,1551 Shelter Island Dr,OTC - 11000284 - Kona Kai Expansion,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...,
4,pw_vpm_1805_435,PW_VPM,Routine,submitted,2018-03-15,2018-04-16,,,,1551 Shelter Island Dr,OTC - 11000284 - Kona Kai Expansion,Issues observed on site:\r\n\r\nâ€¢\tAll sedim...,


# PTS

In [8]:
ptsv = pd.read_csv('/usr/local/airflow/poseidon/data/temp/tsw_violations_pts_temp.csv')
ptsv.head()

Unnamed: 0,INSP_ID,ASSESSOR_PARCEL_10,LATITUDE,LONGITUDE,STREET_ADDRESS,INSP_TYPE_ID,INSP_TYPE_NM,INSP_RESULT_ID,INSP_RESULT_NM,PERFORMED_END_DT,PROJ_TITLE,SCOPE,LOCATION_NOTE,CONSTRUCTION_NOTE
0,3677918,4250131000.0,32.805272,-117.211903,3655 PAUL JONES AV,133,Storm Water Const BMPs,4,Fail,2018-11-13T07:14:24,Jones Residence Addition,,,
1,3677909,4163812000.0,32.802811,-117.246608,1344 MISSOURI ST,133,Storm Water Const BMPs,4,Fail,2018-11-13T07:07:54,Callahan Addition and Remodel,,,
2,3677833,3502310000.0,32.847275,-117.264779,7778 LUDINGTON PL,206,Storm Water Const BMPs-Status,4,Fail,2018-11-13T16:00:00,Conboy Studio,,ASBS,
3,3677828,3442303000.0,32.864544,-117.241784,8680 ROBIN HOOD LN,133,Storm Water Const BMPs,4,Fail,2018-11-13T16:00:00,Verna Pool and Spa,,,
4,3677813,4440601000.0,32.758948,-117.171225,875 HOTEL CIRCLE SOUTH,205,Storm Water Const BMPs-Status,4,Fail,2018-11-13T16:00:00,Legacy Internatn'l (build out),,,


In [9]:
ptsv.loc[ptsv.ASSESSOR_PARCEL_10.isnull(), "ASSESSOR_PARCEL_10"] = 0
ptsv['PARCEL_APN'] = ptsv.ASSESSOR_PARCEL_10.astype(int)
ptsv['LON'] = ptsv.LONGITUDE.astype(str)
ptsv['LAT'] = ptsv.LATITUDE.astype(str)
ptsv['SRC'] = 'DSD_PTS'
ptsv['TYPE'] = ptsv.INSP_TYPE_NM
ptsv['STATUS'] = ptsv.INSP_RESULT_NM
ptsv['UUID'] = (ptsv['SRC'] + '_' + ptsv['INSP_ID'].astype(str).str.replace('-', '_')).str.lower()
ptsv['ADDRESS'] = ptsv['STREET_ADDRESS'].astype(str)
ptsv['ISSUE_DATE'] = ptsv['PERFORMED_END_DT']
ptsv['VIOLATOR'] = ptsv['PROJ_TITLE']
ptsv['ADDITIONAL_1'] = ptsv['SCOPE']
ptsv['ADDITIONAL_2'] = ptsv['CONSTRUCTION_NOTE']
ptsv['COMPLY_BY'] = ''


ptsv = ptsv[['UUID',
           'SRC',
           'TYPE',
           'STATUS',
           'ISSUE_DATE',
           'COMPLY_BY',
           'PARCEL_APN',
           'LON',
           'LAT',
           'ADDRESS',
           'VIOLATOR',
           'ADDITIONAL_1',
           'ADDITIONAL_2']]
ptsv.head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,dsd_pts_3677918,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:14:24,,4250130600,-117.211903,32.805272,3655 PAUL JONES AV,Jones Residence Addition,,
1,dsd_pts_3677909,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:07:54,,4163811500,-117.246608,32.802811,1344 MISSOURI ST,Callahan Addition and Remodel,,
2,dsd_pts_3677833,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,3502310200,-117.264779,32.847275,7778 LUDINGTON PL,Conboy Studio,,
3,dsd_pts_3677828,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T16:00:00,,3442303200,-117.241784,32.864544,8680 ROBIN HOOD LN,Verna Pool and Spa,,
4,dsd_pts_3677813,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,4440601000,-117.171225,32.758948,875 HOTEL CIRCLE SOUTH,Legacy Internatn'l (build out),,


In [10]:
vs = pd.concat([ptsv, sfv, vpm])
print(vs.shape)
vs.head()

(3943, 13)


Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,dsd_pts_3677918,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:14:24,,4250130600,-117.211903,32.805272,3655 PAUL JONES AV,Jones Residence Addition,,
1,dsd_pts_3677909,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:07:54,,4163811500,-117.246608,32.802811,1344 MISSOURI ST,Callahan Addition and Remodel,,
2,dsd_pts_3677833,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,3502310200,-117.264779,32.847275,7778 LUDINGTON PL,Conboy Studio,,
3,dsd_pts_3677828,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T16:00:00,,3442303200,-117.241784,32.864544,8680 ROBIN HOOD LN,Verna Pool and Spa,,
4,dsd_pts_3677813,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,4440601000,-117.171225,32.758948,875 HOTEL CIRCLE SOUTH,Legacy Internatn'l (build out),,


In [11]:
# Try to geocode missing Addresses:

def get_geocode(row):
    
    
    
    if (row['LON'] == '') or (row['LAT'] == ''):
        address = row['ADDRESS']
        if (address == '') or (address == 'nan'):
            address = get_address_for_apn(row['PARCEL_APN'])
            
        print(address)
        
        gres = geocode_address_google(address, 'San Diego', 'CA')
        print(gres)
        if gres[0] == None:
            address = get_address_for_apn(row['PARCEL_APN'])
            print("Attempt geocode at new address {}".format(address))
            if len(address) > 0:
                gres = geocode_address_google(address, 'San Diego', 'CA')
                print(gres)
        
        row['LON'] = gres[1]
        row['LAT'] = gres[0]
        row['ADDRESS'] = address
        
    return row
    

In [51]:
vs = vs.apply(get_geocode, axis=1)


7847 Airway Rd
Google Geocoding for: 7847 Airway Rd
Geocode success for: 7847 Airway Rd
(32.559634, -116.9725946)
2241 Cactus Rd
Google Geocoding for: 2241 Cactus Rd
No geocode for: 2241 Cactus Rd
(None, None)
Get address for APN 6461001400
Attempt geocode at new address 2241 CACTUS RD
Google Geocoding for: 2241 CACTUS RD
No geocode for: 2241 CACTUS RD
(None, None)
Get address for APN 2673607700
0 PASEO MONTENERO None
Google Geocoding for: 0 PASEO MONTENERO None
Geocode success for: 0 PASEO MONTENERO None
(33.0168043, -117.1402095)
3245 Sports Arena Blvd
Google Geocoding for: 3245 Sports Arena Blvd
Geocode success for: 3245 Sports Arena Blvd
(32.7514122, -117.2102048)
3245 Sports Arena Blvd
Google Geocoding for: 3245 Sports Arena Blvd
Geocode success for: 3245 Sports Arena Blvd
(32.7514122, -117.2102048)
8583 Aero Dr
Google Geocoding for: 8583 Aero Dr
Geocode success for: 8583 Aero Dr
(32.808881, -117.141733)
2930 Barnard St
Google Geocoding for: 2930 Barnard St
Geocode success for: 29

IndexError: ('list index out of range', u'occurred at index 9')

In [12]:
# REmove non geocodes
print(vs[((vs.LON.astype(str) == '') | (vs.LAT.astype(str) == ''))].shape)
#vs = vs[(vs.LON.astype(str) != '') & (vs.LAT.astype(str) != '')]
vs[((vs.LON.astype(str) == '') | (vs.LAT.astype(str) == ''))].head(1000)

(39, 13)


Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,tsw_sf_vi_0007250,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/12/2018,9/26/2018,6461104000.0,,,7847 Airway Rd,Copart Auto Storage,Initial Review Complete,
1,tsw_sf_vi_0007930,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/18/2018,10/2/2018,6461001400.0,,,2241 Cactus Rd,ADESA San Diego,Initial Review Complete,
2,tsw_sf_vi_0004596,TSW_SF,BMP Violation: Structural Maintenance Citation,Unresolved,,,2673607700.0,,,,"Black Mountain Ranch, LLC",Review Complete,
3,tsw_sf_vi_0007259,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/27/2018,10/11/2018,4416501300.0,,,3245 Sports Arena Blvd,SunBelt Investments,Initial Review Complete,
4,tsw_sf_vi_0007260,TSW_SF,BMP Violation: Structural Maintenance Citation,Unresolved,9/27/2018,10/11/2018,4416501300.0,,,3245 Sports Arena Blvd,SunBelt Investments,Initial Review Complete,
5,tsw_sf_vi_0007138,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/19/2018,10/3/2018,4210501500.0,,,8583 Aero Dr,Olympus Property,Initial Review Complete,
6,tsw_sf_vi_0007297,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/17/2018,10/1/2018,4497200200.0,,,2930 Barnard St,"Dylan Point Loma, attn: Community Manager",Initial Review Complete,
7,tsw_sf_vi_0006939,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/13/2018,9/27/2018,3413923300.0,,,10155 Pacific Heights Blvd,,Initial Review Complete,
8,tsw_sf_vi_0004247,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Resolved,5/28/2018,6/12/2018,3040706000.0,,,12770 El Camino Real,Kilroy Realty,Review Complete,
9,tsw_sf_vi_0006454,TSW_SF,BMP Violation: Structural Maintenance NOV,Resolved,5/28/2018,6/12/2018,3040706000.0,,,12770 El Camino Real,Kilroy Realty,Review Complete,


In [13]:
vs.ADDITIONAL_1 = vs.ADDITIONAL_1.astype(str)
vs.ADDITIONAL_2 = vs.ADDITIONAL_2.astype(str)
vs.ADDITIONAL_1 = vs.ADDITIONAL_1.apply(lambda x: ''.join(e for e in x if e.isalnum()))
vs.ADDITIONAL_2 = vs.ADDITIONAL_2.apply(lambda x: ''.join(e for e in x if e.isalnum()))

In [14]:

vs['LON'] = pd.to_numeric(vs.LON, downcast='float')
vs['LAT'] = pd.to_numeric(vs.LAT, downcast='float')
#vs['COORD'] = vs.COORD.apply(Point)
vs['COORD'] = list(zip(vs.LON, vs.LAT))
vs['COORD'] = vs.COORD.apply(Point)
vs = gpd.GeoDataFrame(vs, geometry='COORD')
vs.head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2,COORD
0,dsd_pts_3677918,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:14:24,,4250130600,-117.211906,32.805271,3655 PAUL JONES AV,Jones Residence Addition,,,POINT (-117.2119064331055 32.80527114868164)
1,dsd_pts_3677909,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:07:54,,4163811500,-117.246605,32.802811,1344 MISSOURI ST,Callahan Addition and Remodel,,,POINT (-117.2466049194336 32.80281066894531)
2,dsd_pts_3677833,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,3502310200,-117.264778,32.847275,7778 LUDINGTON PL,Conboy Studio,,,POINT (-117.264778137207 32.84727478027344)
3,dsd_pts_3677828,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T16:00:00,,3442303200,-117.241783,32.864544,8680 ROBIN HOOD LN,Verna Pool and Spa,,,POINT (-117.2417831420898 32.86454391479492)
4,dsd_pts_3677813,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,4440601000,-117.171227,32.758949,875 HOTEL CIRCLE SOUTH,Legacy Internatn'l (build out),,,POINT (-117.1712265014648 32.75894927978516)


In [None]:
vs.groupby('SRC').count()["UUID"]

In [46]:
t = vs.loc[vs.STATUS.astype(str) == 'Fail']
t.loc[0,]


UUID                                         dsd_pts_3677918
SRC                                                  DSD_PTS
TYPE                                  Storm Water Const BMPs
STATUS                                                  Fail
ISSUE_DATE                               2018-11-13T07:14:24
COMPLY_BY                                                   
PARCEL_APN                                        4250130600
LON                                                 -117.212
LAT                                                  32.8053
ADDRESS                                  3655 PAUL JONES AV 
VIOLATOR                            Jones Residence Addition
ADDITIONAL_1                                             nan
ADDITIONAL_2                                             nan
COORD           POINT (-117.2119064331055 32.80527114868164)
Name: 0, dtype: object

In [29]:
print(2+3)

5


In [None]:
os.remove('/usr/local/airflow/poseidon/data/temp/tsw_violations_merged.geojson')
vs.to_file('/usr/local/airflow/poseidon/data/temp/tsw_violations_merged.geojson', driver='GeoJSON')

In [40]:
# Reconstruct and Write

vs.to_csv('/usr/local/airflow/poseidon/data/temp/tsw_violations_merged.csv')
vs.head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,dsd_pts_3677918,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:14:24,,4250130600,-117.211903,32.805272,3655 PAUL JONES AV,Jones Residence Addition,,
1,dsd_pts_3677909,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T07:07:54,,4163811500,-117.246608,32.802811,1344 MISSOURI ST,Callahan Addition and Remodel,,
2,dsd_pts_3677833,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,3502310200,-117.264779,32.847275,7778 LUDINGTON PL,Conboy Studio,,
3,dsd_pts_3677828,DSD_PTS,Storm Water Const BMPs,Fail,2018-11-13T16:00:00,,3442303200,-117.241784,32.864544,8680 ROBIN HOOD LN,Verna Pool and Spa,,
4,dsd_pts_3677813,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-11-13T16:00:00,,4440601000,-117.171225,32.758948,875 HOTEL CIRCLE SOUTH,Legacy Internatn'l (build out),,


In [18]:
df = pd.read_csv('/usr/local/airflow/poseidon/data/prod/stormwater_violations_merged.csv')
df[((df['LON'].astype(str) == '') | (df['LAT'].astype(str) == ''))]

Unnamed: 0.1,Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2,COORD


In [16]:
print(vs[((vs.LON.astype(str) == '') | (vs.LAT.astype(str) == ''))].shape)
#vs = vs[(vs.LON.astype(str) != '') & (vs.LAT.astype(str) != '')]
vs[((vs.LON.astype(str) == '') | (vs.LAT.astype(str) == ''))].head(1000)

(0, 14)


Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2,COORD


In [13]:
where_stmt = "APN IN ("
for i in parcel_prim.PARCEL_APN:
    where_stmt = where_stmt + "'" + str(i) + "', "

where_stmt = where_stmt + ")"
where_stmt = where_stmt.replace(', )', ")")
where_stmt

"APN IN ('6461104000', '2673607700', '4416501300', '4416501300', '4210501500', '4497200200', '3413923300', '3040706000', '3040706000', '3431603300', '3431601600')"

In [72]:
def get_address_for_apn(apn):

    apn = '6461104000'

    import requests

    url = "https://gissd.sandag.org/rdw/rest/services/Parcel/Parcels/MapServer/1/query"

    querystring = {
        "where":"APN IN ('{}')".format(apn),
        #"where": where_stmt,
        "objectIds":"",
        "time":"",
        "geometry":"",
        "geometryType":"esriGeometryEnvelope",
        "inSR":"",
        "spatialRel":"esriSpatialRelIntersects",
        "distance":"",
        "units":"esriSRUnit_Foot",
        "relationParam":"",
        "outFields": "*",
        #"outFields":"*",
        "returnGeometry":"true",
        "maxAllowableOffset":"",
        "geometryPrecision":"",
        "outSR":"4326",
        "gdbVersion":"",
        "returnDistinctValues":"false",
        "returnIdsOnly":"false",
        "returnCountOnly":"false",
        "returnExtentOnly":"false",
        "orderByFields":"",
        "groupByFieldsForStatistics":"",
        "outStatistics":"",
        "returnZ":"false",
        "returnM":"false",
        "multipatchOption":"",
        "resultOffset":"",
        "resultRecordCount":"",
        "f":"json"}

    headers = {
        'Cache-Control': "no-cache",
        'Postman-Token': "45d06817-feae-4fec-8dff-71c088d518d7"
        }

    response = requests.request("POST", url, headers=headers, params=querystring)
    data = response.json()
    #gdf = gpd.GeoDataFrame.from_features(data['features'])

    #gdf.head()

    apn_info = data['features'][0]['attributes']

    address = "{} {} {} {}".format(apn_info['SITUS_ADDRESS'], apn_info['SITUS_STREET'], apn_info['SITUS_SUFFIX'], apn_info['SITUS_ZIP'])

    print(address)
    return address


'7847 AIRWAY RD 92154-8305'

In [156]:
def remoteGeoJSONToGDF(url, display = False):
    """Import remote GeoJSON to a GeoDataFrame
    Keyword arguments:
    url -- URL to GeoJSON resource on web
    display -- Displays geometries upon loading (default: False)
    """
    r = requests.get(url)
    data = r.json()
    gdf = gpd.GeoDataFrame.from_features(data['features'])
    if display:
        gdf.plot()
return gdf



{u'geometry': {u'rings': [[[-117.22532022565962, 32.883570890024], [-117.22590078922704, 32.88340652253302], [-117.22631008722587, 32.884435749534106], [-117.22572951844542, 32.8846001189375], [-117.22532022565962, 32.883570890024]]]}, u'attributes': {u'APN': u'3431601600'}}
{u'geometry': {u'rings': [[[-116.97112397409542, 32.56047461978179], [-116.971163317663, 32.55698925628499], [-116.97118370594045, 32.55517535448164], [-116.97119247164807, 32.55439548055537], [-116.97119812378011, 32.55389260295056], [-116.97120205232673, 32.553543161628234], [-116.971202114527, 32.553537665046285], [-116.9733726679221, 32.55354184228616], [-116.97329225871542, 32.56048767305919], [-116.97112397409542, 32.56047461978179]]]}, u'attributes': {u'APN': u'6461104000'}}
{u'geometry': {u'rings': [[[-117.14174411011962, 33.01505306740294], [-117.1416919219961, 33.015013114778384], [-117.14163561373451, 33.01497735029007], [-117.14164605075725, 33.01494265876375], [-117.14165224253945, 33.01490696429254], 

In [172]:
## Create A DF where only PARCEL_APN exists, but not lat/lon

parcel_prim = vs[vs.PARCEL_APN.notnull() & (vs.PARCEL_APN != 0) & (
    (vs.LON == '') | (vs.LAT == '') | (vs.LON.isnull()) | (vs.LAT.isnull()))]

parcel_prim.sort_values(by='LON', na_position='first').head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
0,tsw_sf_vi_0007250,TSW_SF,BMP Violation: Notice of Deficient Maintenance,Unresolved,9/12/2018,9/26/2018,6461104000.0,,,7847 Airway Rd,Copart Auto Storage,Initial Review Complete,
7,pw_vpm_1363_206,PW_VPM,Pre-Rain,submitted,2018-02-23,2018-02-26,,,,5260 Roswell Street,24006724 - 5260 Roswell Street,graded slope is missing erosion control blanke...,
8,pw_vpm_1244_206,PW_VPM,Routine,submitted,2018-02-13,2018-02-16,,,,5260 Roswell Street,24006724 - 5260 Roswell Street,"provide bmp at end of the day, slope need BMP ...",
9,pw_vpm_1145_89,PW_VPM,Routine,submitted,2018-02-06,2018-02-09,,,,8025 Birmingham Dr,24007242 - Sharp Birmingham Garage Expansion,â€¢\tInlet protection was missing from constru...,
10,pw_vpm_812_166,PW_VPM,Rain,submitted,2018-01-10,2018-01-16,,,,901-957 Block Pacific Highway,24004099 - Pacific Highway and West Broadway (PI),Inspection 1-9-18\r\nAll concrete & other mate...,


In [173]:
## Create A DF where only LAT_LN EXISTS but NOT PARCEL_APN

ll_prim = vs[vs.PARCEL_APN.isnull() | (vs.PARCEL_APN == 0) & (
    (vs.LON != '') & (vs.LAT != '') & (vs.LON.notnull()) & (vs.LAT.notnull()))]

ll_prim.sort_values(by='PARCEL_APN', na_position='first').head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
3,dsd_pts_3667942,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-26T14:02:51,,0,-117.012265,32.859868,8996 HIGHTAIL DR,TALUS Phased Dev 2016 (63),,
5,dsd_pts_3667932,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-26T14:03:19,,0,-117.012282,32.859989,9002 HIGHTAIL DR,TALUS Phased Dev 2016 (63),,
6,dsd_pts_3667931,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-26T14:03:06,,0,-117.012277,32.859712,8990 HIGHTAIL DR,TALUS Phased Dev 2016 (63),,
15,dsd_pts_3667007,DSD_PTS,Storm Water Const BMPs,Fail,2018-10-26T16:00:00,,0,-117.012335,32.86042,9020 HIGHTAIL DR,TALUS Phased Dev 2016 (63),,
16,dsd_pts_3667006,DSD_PTS,Storm Water Const BMPs,Fail,2018-10-26T16:00:00,,0,-117.012284,32.860143,9008 HIGHTAIL DR,TALUS Phased Dev 2016 (63),,


In [174]:
## Create A DF where everything is legit

legit = vs[vs.PARCEL_APN.notnull() & (vs.PARCEL_APN != 0) & (
    (vs.LON != '') & (vs.LAT != '') & (vs.LON.notnull()) & (vs.LAT.notnull()))]

legit.sort_values(by=['PARCEL_APN', 'LON', 'LAT'], na_position='first').head()

Unnamed: 0,UUID,SRC,TYPE,STATUS,ISSUE_DATE,COMPLY_BY,PARCEL_APN,LON,LAT,ADDRESS,VIOLATOR,ADDITIONAL_1,ADDITIONAL_2
90,dsd_pts_3662311,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-22T14:38:04,,2673812700,-117.150941,33.011859,15441 ARTESIAN RIDGE RD,Artesian Phased Dev. 2016 (36),,
4,dsd_pts_3667936,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-29T16:00:00,,3050103800,-117.193119,32.968231,6124 ARTISAN WY,PHR - ALMERIA 17A 2013 (80),,
2,dsd_pts_3667943,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-29T16:00:00,,3050103800,-117.193495,32.968063,6116 ARTISAN WY,PHR - ALMERIA 17A 2013 (80),,
37,dsd_pts_3664888,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-25T16:00:00,,3050103800,-117.193943,32.967829,6090 SEAFARING WY,PHR - ALMERIA 17A 2013 (80),,
14,dsd_pts_3667020,DSD_PTS,Storm Water Const BMPs-Status,Fail,2018-10-25T16:00:00,,3050103800,-117.194028,32.968202,6098 SEAFARING WY,PHR - ALMERIA 17A 2013 (80),,


In [175]:
# Run some stats
all_row_count = parcel_prim.shape[0] + ll_prim.shape[0] + legit.shape[0]

print("""
Parcels Only Count: {} \n
LonLat Only Count: {} \n
Legit Count: {} \n

Total: {}
""").format(parcel_prim.shape[0], ll_prim.shape[0], legit.shape[0], all_row_count)

assert(all_row_count == vs.shape[0])


Parcels Only Count: 39 

LonLat Only Count: 14 

Legit Count: 86 


Total: 139

