In [40]:
import sys
sys.path.append("../../")

import django
import pylab as pl

from django.db import connections
from firecares.firestation.models import FireDepartment, FireStation
from firecares.utils import dictfetchall
from pandas import DataFrame

%matplotlib inline

django.setup()

cursor = connections['default'].cursor()

queryUnmatchedStations = """
select a.name as firestation_name, 
b.usgsstructuredata_ptr_id as firestation_id, 
b.department_id as dep_id, 
ST_ASTEXT(ST_TRANSFORM(d.geom,900913)) as firestation_location
from firestation_usgsstructuredata a
inner join firestation_firestation b
  on a.id=b.usgsstructuredata_ptr_id
join firecares_core_address d 
  on b.station_address_id=d.id
where b.department_id is not null and d.geom is not null
"""
#Query fireDepartments within 50 miles of station
queryNearbyDepartments = """
select c.name as department_name, c.id as dep_id,
ST_DISTANCE(ST_TRANSFORM(e.geom,900913),ST_GEOMFROMTEXT(%s,900913)) * 0.000621371 as dis_miles,
levenshtein(c.name,%s) as dis_name,
ST_INTERSECTS(ST_TRANSFORM(c.geom,900913),ST_GEOMFROMTEXT(%s,900913)) as dep_intersects
from firestation_firedepartment c 
inner join firecares_core_address e 
  on c.headquarters_address_id=e.id
where e.geom is not null and
(ST_DISTANCE(ST_TRANSFORM(e.geom,900913),ST_GEOMFROMTEXT(%s,900913)) * 0.000621371 <= 100 and
levenshtein(c.name,%s) <= 30); 
"""

queryUpdateFireStation = """
update firestation_firestation 
set department_id = %s
where firestation_firestation.usgsstructuredata_ptr_id = %s
"""
print "Before Execute"
cursor.execute(queryUnmatchedStations)
fireStations = dictfetchall(cursor)
print "After Execute"
print "Number of Stations:", cursor.rowcount

totalMatched = 0
totalFalseMatches = 0
matchedDict = dict()
for fireStation in fireStations:
    params = [fireStation['firestation_location'],
              fireStation['firestation_name'],
              fireStation['firestation_location'],
              fireStation['firestation_location'],
              fireStation['firestation_name']]
    cursor.execute(queryNearbyDepartments,params)
    nearbyDepartments = dictfetchall(cursor)
    distanceRatio = 0
    closestDepID = 0
    closestDepName = ''
    if cursor.rowcount == 0:
        totalFalseMatches += 1
        continue
    for fireDepartment in nearbyDepartments:
        departmentRatio = 1 + (1 - fireDepartment['dis_miles'] / 100) * 50  + (1 - fireDepartment['dis_name'] / 30) * 50
        if fireDepartment['dep_intersects'] == True:
            departmentRatio = departmentRatio * 125            
        if departmentRatio > distanceRatio:
            distanceRatio = departmentRatio
            closestDepID = fireDepartment['dep_id']
            closestDepName = fireDepartment['department_name']
    #fireStation['dep_id'] = closestDepID
    if fireStation['dep_id'] == closestDepID:
        totalMatched += 1
    else:
        totalFalseMatches += 1
    #if matchedDict.get(closestDepName) == None:
    #    matchedDict[closestDepName] = 1
    #else:
    #    matchedDict[closestDepName] = matchedDict[closestDepName] + 1

print "Done Matching"
print "Correctly Matched:", totalMatched
print "Incorrectly Matched:", totalFalseMatches
#for departmentName,numStations in matchedDict.iteritems():
#    print departmentName, numStations

Before Execute
After Execute
Number of Stations: 1847
Done Matching
Correctly Matched: 1645
Incorrectly Matched: 202


In [None]:
import sys
sys.path.append("../../")

import django
import pylab as pl

from django.db import connections
from firecares.firestation.models import FireDepartment, FireStation
from firecares.utils import dictfetchall
from pandas import DataFrame

%matplotlib inline

django.setup()

print "Number of unmatched fire stations:", FireStation.objects.filter(department__isnull=True).count()

cursor = connections['default'].cursor()

queryUnmatchedStations = """
select a.name as firestation_name, 
b.usgsstructuredata_ptr_id as firestation_id, 
b.department_id as dep_id, 
ST_ASTEXT(ST_TRANSFORM(d.geom,900913)) as firestation_location
from firestation_usgsstructuredata a
inner join firestation_firestation b
  on a.id=b.usgsstructuredata_ptr_id
join firecares_core_address d 
  on b.station_address_id=d.id
where b.department_id is null and d.geom is not null
"""
#Query fireDepartments within 50 miles of station
queryNearbyDepartments = """
select c.name as department_name, c.id as dep_id,
ST_DISTANCE(ST_TRANSFORM(e.geom,900913),ST_GEOMFROMTEXT(%s,900913)) * 0.000621371 as dis_miles,
levenshtein(c.name,%s) as dis_name,
ST_INTERSECTS(ST_TRANSFORM(c.geom,900913),ST_GEOMFROMTEXT(%s,900913)) as dep_intersects
from firestation_firedepartment c 
inner join firecares_core_address e 
  on c.headquarters_address_id=e.id
where e.geom is not null and
ST_DISTANCE(ST_TRANSFORM(e.geom,900913),ST_GEOMFROMTEXT(%s,900913)) * 0.000621371 <= 100 and
levenshtein(c.name,%s) <= 30; 
"""

queryUpdateFireStation = """
update firestation_firestation 
set department_id = %s
where firestation_firestation.usgsstructuredata_ptr_id = %s
"""

queryQualityControl = """
select fd.name as fd_name, usfa.number_of_stations as num_stations 
from firestation_firedepartment fd 
inner join usfa_census_national usfa 
 on fd.state=usfa."hq state" and fd.fdid=usfa.fdid;
"""

cursor.execute(queryUnmatchedStations)
fireStations = dictfetchall(cursor)
print "Number of Stations:", cursor.rowcount

matchedDict = dict()
for fireStation in fireStations:
    params = [fireStation['firestation_location'],
              fireStation['firestation_name'],
              fireStation['firestation_location'],
              fireStation['firestation_location'],
              fireStation['firestation_name']]
    cursor.execute(queryNearbyDepartments,params)
    nearbyDepartments = dictfetchall(cursor)
    distanceRatio = 0
    closestDepID = 0
    closestDepName = ''
    if cursor.rowcount == 0:
        continue
    for fireDepartment in nearbyDepartments:
        departmentRatio = 1 + (1 - fireDepartment['dis_miles'] / 100) * 50  + (1 - fireDepartment['dis_name'] / 30) * 50
        if fireDepartment['dep_intersects'] == True:
            departmentRatio = departmentRatio * 125            
        if departmentRatio > distanceRatio:
            distanceRatio = departmentRatio
            closestDepID = fireDepartment['dep_id']
            closestDepName = fireDepartment['department_name']
    fireStation['dep_id'] = closestDepID
    if matchedDict.get(closestDepName) == None:
        matchedDict[closestDepName] = 1
    else:
        matchedDict[closestDepName] = matchedDict[closestDepName] + 1

cursor.execute(queryQualityControl)
qualityControl = dictfetchall(cursor)

totalStationMatches = 0
totalIncorrectStations = 0
for fireDepartment in qualityControl:
    if matchedDict.get(fireDepartment['fd_name']) != None:
        if matchedDict[fireDepartment['fd_name']] == fireDepartment['num_stations']:
            totalStationMatches += 1
        else:
            totalIncorrectStations += 1
            
print "Station Matches:", totalStationMatches
print "Incorrect Station Counts:", totalIncorrectStations
     