Skip to content

Commit

Permalink
Merge pull request #326 from DemocracyClub/import_southwark
Browse files Browse the repository at this point in the history
Import Southwark
  • Loading branch information
symroe committed Jun 20, 2016
2 parents 6b69d8b + 8c39b75 commit 13ff905
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from django.contrib.gis import geos
from django.contrib.gis.gdal import DataSource, GDALException
from django.contrib.gis.geos import Point, GEOSGeometry
from django.db import connection
from django.db import transaction
from django.utils.encoding import force_text
from django.utils.safestring import mark_safe

Expand Down Expand Up @@ -170,6 +172,23 @@ def clean_postcodes_overlapping_districts(self):
self.postcodes_contained_by_district = data['no_attention_needed']
self.postcodes_with_addresses_generated = data['addresses_created']

@transaction.atomic
def clean_ambiguous_addresses(self):
table_name = ResidentialAddress()._meta.db_table
cursor = connection.cursor()
cursor.execute("""
DELETE FROM {0} WHERE CONCAT(address, postcode) IN (
SELECT concat_address FROM (
SELECT CONCAT(address, postcode) AS concat_address, COUNT(*) AS c
FROM {0}
WHERE council_id='E09000028'
GROUP BY CONCAT(address, postcode)
) as dupes
WHERE dupes.c > 1
)
""".format(table_name))



def report(self):
# build report
Expand Down Expand Up @@ -217,6 +236,8 @@ def handle(self, *args, **kwargs):
except NotImplementedError:
pass

self.clean_ambiguous_addresses()

# For areas with shape data, use AddressBase to clean up overlapping
# postcode
self.clean_postcodes_overlapping_districts()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
Import Southwark
"""
from time import sleep

from django.contrib.gis.geos import Point

from data_collection.management.commands import BaseAddressCsvImporter
from data_finder.helpers import geocode, geocode_point_only, PostcodeError
from addressbase.models import Address


class Command(BaseAddressCsvImporter):
"""
Imports the Polling Station data from Southwark Council
"""
council_id = 'E09000028'
addresses_name = 'PropertyPostCodePollingStationWebLookup-2016-06-15.TSV'
stations_name = 'PropertyPostCodePollingStationWebLookup-2016-06-15.TSV'
csv_delimiter = '\t'
elections = [
'ref.2016-06-23'
]

def get_station_hash(self, record):
return "-".join([
record.pollingplaceaddress7,
record.pollingplaceid,
record.pollingdistrictreference,
])

def station_record_to_dict(self, record):
# format address
address = "\n".join([
record.pollingplaceaddress1,
record.pollingplaceaddress2,
record.pollingplaceaddress3,
record.pollingplaceaddress4,
record.pollingplaceaddress5,
record.pollingplaceaddress6,
record.pollingplaceaddress7,
])
while "\n\n" in address:
address = address.replace("\n\n", "\n").strip()

location = None
if int(record.pollingplaceeasting) and int(record.pollingplacenorthing):
location = Point(
float(record.pollingplaceeasting),
float(record.pollingplacenorthing),
srid=27700)
else:
# no points supplied, so attempt to attach them by geocoding
try:
location_data = geocode_point_only(record.pollingplaceaddress7)
except PostcodeError:
pass

if location_data:
location = Point(
location_data['wgs84_lon'],
location_data['wgs84_lat'],
srid=4326)

return {
'internal_council_id': record.pollingplaceid,
'polling_district_id': record.pollingdistrictreference,
'postcode' : record.pollingplaceaddress7,
'address' : address,
'location' : location
}

def address_record_to_dict(self, record):
if record.propertynumber.strip() == '0':
address = record.streetname.strip()
else:
address = '%s %s' % (
record.propertynumber.strip(), record.streetname.strip())

return {
'address' : address,
'postcode' : record.postcode.strip(),
'polling_station_id': record.pollingplaceid
}

0 comments on commit 13ff905

Please sign in to comment.