Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import Bexley #336

Merged
merged 28 commits into from Jun 20, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
01732c1
Clean postcodes when ResidentialAddress model is saved
symroe Jun 12, 2016
4c445af
Add Newport Council in as fixture
symroe Jun 12, 2016
fff8eaa
Add addressbase app
symroe Jun 12, 2016
e748568
debug-toolbar and django-extensions are useful!
symroe Jun 12, 2016
45726f8
optionally pass district to get_polling_station
symroe Jun 13, 2016
116d0d7
Use get_polling_station when creating addresses
symroe Jun 13, 2016
c72417d
pep8 fixes for base importers
symroe Jun 13, 2016
707bc23
DRY up duplicated handle method
symroe Jun 13, 2016
70865e4
Change interface of postcodes_not_contained_by_district
symroe Jun 13, 2016
0a65760
Create addresses for districts on import
symroe Jun 13, 2016
e04c7d4
fixup! pep8 fixes for base importers
symroe Jun 13, 2016
9b70ca8
fixup! Add addressbase app
symroe Jun 16, 2016
6677aa2
fixup! Add addressbase app
symroe Jun 16, 2016
2caee24
fixup! Create addresses for districts on import
symroe Jun 17, 2016
1a8e737
fixup! Create addresses for districts on import
symroe Jun 17, 2016
c77704f
Return a polling station if all have the same address
symroe Jun 17, 2016
bdd3233
Add centre_from_points_qs
symroe Jun 17, 2016
9688a91
Only update_or_create on slug
symroe Jun 17, 2016
39ffbca
Don't re-add seen polling stations
symroe Jun 17, 2016
4e1af79
Try to geocode from addressbase, if we have it
symroe Jun 17, 2016
e9c4c5f
fixup! Try to geocode from addressbase, if we have it
symroe Jun 17, 2016
6029f32
fixup! Create addresses for districts on import
symroe Jun 19, 2016
c0905a0
Catch case where districts overlap
symroe Jun 19, 2016
e78d9fd
Fix polygons for Calderdale
symroe Jun 19, 2016
96da588
Import Southwark
symroe Jun 17, 2016
16296f5
clean_ambiguous_addresses after import
symroe Jun 19, 2016
a490adb
Replace `-` with `_` in CsvHelper headers
symroe Jun 19, 2016
0ae8589
Import Bexley
symroe Jun 19, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
88,514 changes: 88,514 additions & 0 deletions data/E09000004-Bexley/Copy of GLA POLLING STATION FINDER.csv

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions polling_stations/api.py
Expand Up @@ -108,8 +108,8 @@ def get_queryset(self, **kwargs):
assert 'location' in kwargs
assert 'council' in kwargs
return PollingStation.objects.get_polling_station(
kwargs['location'],
kwargs['council']['council_id']
kwargs['council']['council_id'],
location=kwargs['location']
)

def retrieve(self, requst, pk=None, format=None):
Expand Down
Empty file.
3 changes: 3 additions & 0 deletions polling_stations/apps/addressbase/admin.py
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
120 changes: 120 additions & 0 deletions polling_stations/apps/addressbase/fixtures/test_kentwell_data.json
@@ -0,0 +1,120 @@
[
{
"pk": "X01000001",
"model": "councils.council",
"fields": {
"location": null,
"area": null,
"website": "",
"mapit_id": "",
"address": "",
"council_type": "",
"postcode": "",
"name": "Kentwell Hall",
"email": "",
"phone": ""
}
},
{
"model": "pollingstations.pollingdistrict",
"pk": 117,
"fields": {
"polling_station_id": "1",
"area": "SRID=4326;MULTIPOLYGON (((0.7159137724833404 52.0963887902320408, 0.7158064841227768 52.0966656155218288, 0.7150447367626540 52.0965997048947216, 0.7150232790905341 52.0964283368071861, 0.7148623465496525 52.0960855986576377, 0.7145833968121562 52.0957428578754786, 0.7144439219433946 52.0956110337960325, 0.7143044470746330 52.0954001144594159, 0.7143044470746330 52.0952089679492687, 0.7142508028943422 52.0950178206195460, 0.7141757010419404 52.0949321336202118, 0.7140576838452987 52.0948794030766749, 0.7142400740583001 52.0946487062176971, 0.7143795489270348 52.0945366530267151, 0.7147979735333286 52.0944905133962024, 0.7153666018443900 52.0946552975732686, 0.7156991957622041 52.0946157494263247, 0.7159352301554605 52.0950639597052572, 0.7162570952371966 52.0955714864963397, 0.7159137724833404 52.0963887902320408)))",
"council": "X01000001",
"internal_council_id": "1",
"name": "Little Melford",
"extra_id": "1"
}
},
{
"model": "pollingstations.pollingdistrict",
"pk": 118,
"fields": {
"polling_station_id": "2",
"area": "SRID=4326;MULTIPOLYGON (((0.7216429709382847 52.0976938087004839, 0.7218146323152173 52.0978322175108843, 0.7222545145935950 52.0979047171923710, 0.7229304312652651 52.0980101710646437, 0.7232522963470014 52.0980628979070133, 0.7232952116912411 52.0971533511376492, 0.7229089735931452 52.0970808502349669, 0.7222008704133132 52.0970017582065452, 0.7217609881349354 52.0970874412311957, 0.7218790053315770 52.0972456248439713, 0.7217073439546445 52.0972983525900517, 0.7215893267580028 52.0973774440925936, 0.7216429709382847 52.0976938087004839)))",
"council": "X01000001",
"internal_council_id": "2",
"name": "Barns Sward",
"extra_id": "2"
}
},
{
"model": "pollingstations.pollingdistrict",
"pk": 119,
"fields": {
"polling_station_id": "3",
"area": "SRID=4326;MULTIPOLYGON (((0.7199800013493582 52.0969094839963276, 0.7198405264805966 52.0967051618510126, 0.7201516627262907 52.0960790075134526, 0.7220184802004105 52.0962569680613257, 0.7216000555941527 52.0970874412311957, 0.7199800013493582 52.0969094839963276)))",
"council": "X01000001",
"internal_council_id": "3",
"name": "Tourney Field",
"extra_id": "3"
}
},
{
"pk": 119,
"model": "pollingstations.pollingstation",
"fields": {
"postcode": "KW15 88TF",
"location": "SRID=4326;POINT (0.7208994610313341 52.0965737442737549)",
"polling_district_id": "TF1",
"council": "X01000001",
"internal_council_id": "3",
"polling_district_id": "3",
"address": "By the gate"
}
},
{
"pk": 118,
"model": "pollingstations.pollingstation",
"fields": {
"postcode": "KW15 88TF",
"location": "SRID=4326;POINT (0.7222083790304389 52.0978388001128465)",
"polling_district_id": "BS1",
"council": "X01000001",
"internal_council_id": "2",
"polling_district_id": "2",
"address": "In the woolshed"
}
},
{
"pk": 117,
"model": "pollingstations.pollingstation",
"fields": {
"postcode": "KW15 88LM",
"location": "SRID=4326;POINT (0.7151917202484516 52.0963096886925001)",
"polling_district_id": "LM1",
"council": "X01000001",
"internal_council_id": "1",
"polling_district_id": "1",
"address": "Tissy's caravan"
}
},
{
"pk": "9999991",
"fields": {
"postcode": "KW15 88TF",
"address": "In the middle of the field",
"location": "SRID=4326;POINT (0.7208994610313337 52.0965737442737833)"
},
"model": "addressbase.address"
},
{
"pk": "9999992",
"fields": {
"postcode": "KW15 88TF",
"address": "The Cott",
"location": "SRID=4326;POINT (0.7225083308451804 52.0975051994085803)"
},
"model": "addressbase.address"
},
{
"pk": "9999993",
"fields": {
"postcode": "KW15 88LM",
"address": "Sym's Caravan",
"location": "SRID=4326;POINT (0.7152352684829660 52.0954911675566947)"
},
"model": "addressbase.address"
}
]
90 changes: 90 additions & 0 deletions polling_stations/apps/addressbase/helpers.py
@@ -0,0 +1,90 @@
from pollingstations.models import (PollingDistrict, ResidentialAddress,
PollingStation)
from addressbase.models import Address


def centre_from_points_qs(qs):
if not qs:
return None

if len(qs) == 1:
return qs[0].location

base_point = qs[0].location
poly = base_point.union(qs[1].location)
for m in qs:
poly = poly.union(m.location)

return poly.centroid


def district_contains_all_points(district, points):
return all([district.area.contains(p) for p in points])


def postcodes_not_contained_by_district(district):
data = {
'not_contained': [],
'total': 0
}

for postcode in Address.objects.postcodes_for_district(district):
points = Address.objects.points_for_postcode(postcode)
data['total'] += 1
if not district_contains_all_points(district, points):
data['not_contained'].append(postcode)
return data


def make_addresses_for_postcode(postcode, council_id):
addresses = Address.objects.filter(postcode=postcode)
created = 0
for address in addresses:
try:
district = PollingDistrict.objects.get(
area__covers=address.location, council_id=council_id)
except PollingDistrict.DoesNotExist:
# Chances are this is on the edge of the council area, and
# we don't have data for the are the property is in
continue
except PollingDistrict.MultipleObjectsReturned:
# This is normally causes by districts the overlap
# Because we have no way of knowing what the correct district is,
# we need to ignore this address
continue

polling_station = PollingStation.objects.get_polling_station(
district.council.pk,
polling_district=district)

residential_address, _ = ResidentialAddress.objects.get_or_create(
slug=address.uprn,
defaults={
'address': address.address,
'postcode': postcode,
'council': district.council,
'polling_station_id': polling_station.internal_council_id,
}
)
created += 1
return created

def create_address_records_for_council(council):
postcode_report = {
'no_attention_needed': 0,
'addresses_created': 0,
'postcodes_needing_address_lookup': set(),
}

for district in PollingDistrict.objects.filter(council=council):
data = postcodes_not_contained_by_district(district)

postcode_report['no_attention_needed'] += \
data['total'] - len(data['not_contained'])

for postcode in data['not_contained']:
postcode_report['postcodes_needing_address_lookup'].add(postcode)
created = make_addresses_for_postcode(postcode, council.pk)
postcode_report['addresses_created'] = created

return postcode_report
@@ -0,0 +1 @@

@@ -0,0 +1,100 @@
import csv
import os
import glob

from django.core.management.base import BaseCommand


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
'ab_path',
help='The path to the folder containing the AddressBase CSVs'
)


def handle(self, *args, **kwargs):
self.fieldnames = [
'UPRN',
'OS_ADDRESS_TOID',
'UDPRN',
'ORGANISATION_NAME',
'DEPARTMENT_NAME',
'PO_BOX_NUMBER',
'SUB_BUILDING_NAME',
'BUILDING_NAME',
'BUILDING_NUMBER',
'DEPENDENT_THOROUGHFARE',
'THOROUGHFARE',
'POST_TOWN',
'DOUBLE_DEPENDENT_LOCALITY',
'DEPENDENT_LOCALITY',
'POSTCODE',
'POSTCODE_TYPE',
'X_COORDINATE',
'Y_COORDINATE',
'LATITUDE',
'LONGITUDE',
'RPC',
'COUNTRY',
'CHANGE_TYPE',
'LA_START_DATE',
'RM_START_DATE',
'LAST_UPDATE_DATE',
'CLASS',
]
self.base_path = os.path.abspath(kwargs['ab_path'])
for csv_path in glob.glob(os.path.join(self.base_path, '*.csv')):
if csv_path.endswith('cleaned.csv'):
continue
file_name = csv_path.split('/')[-1].split('.')[0]
out_path = os.path.join(self.base_path, '{}_cleaned.csv'.format(
file_name
))
with open(out_path, 'w') as out_file:
self.out_csv = csv.DictWriter(out_file, fieldnames=[
'UPRN',
'address',
'postcode',
'location',
])
print(csv_path)
self.clean_csv(csv_path)
out_file.flush()

def line_filer(self, csv_path):
with open(csv_path) as csv_file:
for line in csv.DictReader(csv_file, fieldnames=self.fieldnames):
# Do any filtering we might need to do here
yield line

def clean_csv(self, csv_path):
for line in self.line_filer(csv_path):
self.out_csv.writerow(self.clean_output_line(line))

def clean_address(self, line):
address_fields = [
line['ORGANISATION_NAME'],
line['DEPARTMENT_NAME'],
line['PO_BOX_NUMBER'],
line['SUB_BUILDING_NAME'],
line['BUILDING_NAME'],
line['BUILDING_NUMBER'],
line['DEPENDENT_THOROUGHFARE'],
line['THOROUGHFARE'],
line['DOUBLE_DEPENDENT_LOCALITY'],
line['DEPENDENT_LOCALITY'],
line['POST_TOWN'],
]
return ", ".join([f for f in address_fields if f])

def clean_output_line(self, line):
data = {}
data['UPRN'] = line['UPRN']
data['address'] = self.clean_address(line)
data['postcode'] = line['POSTCODE']
data['location'] = "SRID=4326;POINT({} {})".format(
line['LONGITUDE'],
line['LATITUDE'],
)
return data
@@ -0,0 +1,28 @@
import os
import glob

from django.db import connection
from django.core.management.base import BaseCommand


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
'cleaned_ab_path',
help='The path to the folder containing the cleaned AddressBase CSVs'
)


def handle(self, *args, **kwargs):
glob_str = os.path.join(
kwargs['cleaned_ab_path'],
"*_cleaned.csv"
)
for cleaned_file_path in glob.glob(glob_str):
print(cleaned_file_path)
cursor = connection.cursor()

cursor.execute("""
COPY addressbase_address (UPRN,address,postcode,location)
FROM '{}' (FORMAT CSV, DELIMITER ',', quote '"');
""".format(cleaned_file_path))
25 changes: 25 additions & 0 deletions polling_stations/apps/addressbase/migrations/0001_initial.py
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import models, migrations
import django.contrib.gis.db.models.fields


class Migration(migrations.Migration):

replaces = [('addressbase', '0001_initial'), ('addressbase', '0002_auto_20160611_1700'), ('addressbase', '0003_auto_20160611_2130'), ('addressbase', '0004_auto_20160611_2304'), ('addressbase', '0005_auto_20160612_0904')]

dependencies = [
]

operations = [
migrations.CreateModel(
name='Address',
fields=[
('uprn', models.CharField(primary_key=True, max_length=100, serialize=False)),
('address', models.TextField(blank=True)),
('postcode', models.CharField(db_index=True, max_length=15, blank=True)),
('location', django.contrib.gis.db.models.fields.PointField(null=True, srid=4326, blank=True)),
],
),
]
Empty file.