Permalink
Browse files

Replace 3-step zips/streets/count import with 1-step import of Columb…

…us County. Also enhance the import to use township data to fully cover the county with "cities".
  • Loading branch information...
kmtracey committed Apr 17, 2012
1 parent 6720b78 commit 5d96a640aded07bd5828b55c13c4b2b2d9de139a
Showing with 158 additions and 62 deletions.
  1. +1 −0 .gitignore
  2. +8 −6 README.rst
  3. +149 −56 openrural/management/commands/import_columbus_county.py
View
@@ -6,3 +6,4 @@ build/
aws.sh
fabsecrets.py
openrural.log
+shapefiles/
View
@@ -46,12 +46,14 @@ Columbus County, NC
To import data for Columbus County, NC::
- $ django-admin.py import_nc_zips
- $ django-admin.py import_county_streets 37047
- $ django-admin.py import_columbus_county
-
-Where 37047 is the U.S. Census county ID for the county you want to import
-(37047 = Columbus County, NC).
+ $ django-admin.py import_columbus_county --dir=shapefiles
+
+The --dir option specified to import_columbus_county directs the command to look
+for the necessary shapefiles in the specified directory. If that directory does not
+exist, then it will be created, the files will be downloaded into that directory,
+and they will be left there for later use. If --dir is not specified then the files
+will be downloaded to a temporary directory which will be deleted before the command
+finishes.
Server Provisioning and Deployment
----------------------------------
@@ -1,76 +1,169 @@
import os
-import datetime
+import glob
import tempfile
from optparse import make_option, OptionParser
from django.core.management.base import BaseCommand
-from django.contrib.gis.utils import LayerMapping
-from django.contrib.gis.gdal import DataSource, OGRGeomType, OGRGeometry
-from ebpub.utils.text import slugify
+from django.contrib.gis.gdal import DataSource
+from django.contrib.gis.geos import GEOSGeometry
+from ebpub.utils.text import slugify
from ebpub.db.models import Location, LocationType
+from ebpub.db.bin.import_zips import ZipImporter
+from ebpub.db.bin.import_locations import LocationImporter
from ebpub.metros.allmetros import get_metro
-from ebpub.utils.geodjango import make_multi
-from ebpub.geocoder.parser.parsing import normalize
from ebpub.utils.script_utils import die, makedirs, wget, unzip
+from ebpub.streets.blockimport.tiger.import_blocks import TigerImporter
+from ebpub.streets.bin import populate_streets
+STATE = '37' # NC
+COUNTY = '37047' # NC, Columbus county
class Command(BaseCommand):
- help = 'Import Columbus County city boundaries'
- url = 'http://www.columbusco.org/GISData/City.zip'
+ option_list = BaseCommand.option_list + (
+ make_option("-d", "--dir", action="store", type="string", dest="dir"),
+ )
+ help = 'Import Columbus County data: county shape and townships, census places, blocks, zips'
- def clean_name(self, name):
- # convert "BRUNSWICK CITY LIMITS" to "BRUNCSWICK"
- return unicode(name).replace(' CITY LIMITS', '')
+ def fetch_files(self, zip_dir):
+ orig_cwd = os.getcwd()
+ if zip_dir:
+ download = not os.path.exists(zip_dir)
+ if download:
+ os.makedirs(zip_dir)
+ else:
+ zip_dir = tempfile.mkdtemp()
+ download = True
+ os.chdir(zip_dir)
+ if download:
+ print 'Download TIGER & County GIS data to %s' % zip_dir
+ census_base_url = 'ftp://ftp2.census.gov/geo/tiger/TIGER2010'
+ county_base_url = 'http://www.columbusco.org/GISData'
+ census_zips = (
+ 'PLACE/2010/tl_2010_%s_place10.zip' % STATE,
+ 'EDGES/tl_2010_%s_edges.zip' % COUNTY,
+ 'FACES/tl_2010_%s_faces.zip' % COUNTY,
+ 'FEATNAMES/tl_2010_%s_featnames.zip' % COUNTY,
+ 'ZCTA5/2010/tl_2010_%s_zcta510.zip' % STATE,
+ )
+ # Interestingly, versions of these files are available from the census data as well.
+ # However, at least the township one has clearly different boundaries for the townships.
+ # Going with the county data on the assumption it's coming from a source more likely to
+ # know what the "right" boundaries are.
+ county_zips = (
+ 'County.zip',
+ 'Township.zip',
+ )
+ for fname in census_zips:
+ fetch_name = '%s/%s' % (census_base_url, fname)
+ wget(fetch_name) or die(
+ 'Could not download %s' % fetch_name)
+ for fname in county_zips:
+ fetch_name = '%s/%s' % (county_base_url, fname)
+ wget(fetch_name) or die(
+ 'Could not download %s' % fetch_name)
- def download_file(self):
- tmp = tempfile.mkdtemp()
- wget(self.url, cwd=tmp) or die("Could not download %s" % self.url)
- zip_path = os.path.join(tmp, 'City.zip')
- unzip(zip_path, cwd=tmp) or die("failed to unzip %s" % tmp)
- shapefile = os.path.join(tmp, 'City.shp')
- return shapefile
+ for fname in glob.glob('*zip'):
+ unzip(fname) or die('Could not unzip %s' % fname)
+ print "Shapefiles unzipped in %s" % zip_dir
+ os.chdir(orig_cwd)
+ return zip_dir
def handle(self, **options):
- shapefile = self.download_file()
- now = datetime.datetime.now()
+ zip_dir = self.fetch_files(options['dir'])
+
metro_name = get_metro()['metro_name'].upper()
- # get or create City location type
- type_data = {'name': 'City', 'plural_name': 'Cities', 'slug': 'cities',
- 'is_browsable': True, 'is_significant': True,
- 'scope': metro_name}
+
+ county_type_data = {
+ 'name': 'County',
+ 'plural_name': 'Counties',
+ 'slug': 'counties',
+ 'is_browsable': True,
+ 'is_significant': True,
+ 'scope': metro_name,
+ }
try:
- type_ = LocationType.objects.get(slug=type_data['slug'])
+ county_type = LocationType.objects.get(slug=county_type_data['slug'])
except LocationType.DoesNotExist:
- type_ = LocationType.objects.create(**type_data)
- # start with a fresh list of cities
- Location.objects.filter(location_type=type_).delete()
- # build list of cities
- locations = {}
- layer = DataSource(shapefile)[0]
- for feature in layer:
- name = self.clean_name(feature['Name'])
- # convert to 4326
- geom = feature.geom.transform(4326, True).geos
- if name not in locations:
- locations[name] = {
- 'name': name,
- 'slug': slugify(name),
- 'location_type': type_,
- 'city': metro_name,
- 'source': 'Columbus County GIS data',
- 'is_public': True,
- 'creation_date': now,
- 'last_mod_date': now,
- 'display_order': 0,
- 'normalized_name': normalize(name),
- 'location': [],
- }
- location = locations[name]
- location['location'].append(geom)
- # create city locations
- for name, location in locations.iteritems():
- location['location'] = make_multi(location['location'])
- Location.objects.create(**location)
- print 'Imported %d locations' % type_.location_set.count()
+ county_type = LocationType.objects.create(**county_type_data)
+
+ Location.objects.filter(location_type=county_type).delete()
+ county_layer = DataSource('%s/County.shp' % zip_dir)[0]
+ loc_importer = LocationImporter(county_layer, county_type, filter_bounds=False, verbose=True)
+ loc_created_count = loc_importer.save('NAME')
+ # We are assuming here we are only going to have ONE county!
+ columbus_county_location = Location.objects.get(location_type=county_type)
+
+ city_type_data = {
+ 'name': 'City',
+ 'plural_name': 'Cities',
+ 'slug': 'cities',
+ 'is_browsable': True,
+ 'is_significant': True,
+ 'scope': metro_name,
+ }
+ try:
+ city_type = LocationType.objects.get(slug=city_type_data['slug'])
+ except LocationType.DoesNotExist:
+ city_type = LocationType.objects.create(**city_type_data)
+
+ Location.objects.filter(location_type=city_type).delete()
+ city_layer = DataSource('%s/tl_2010_%s_place10.shp' % (zip_dir, STATE))[0]
+ loc_importer = LocationImporter(city_layer, city_type, filter_bounds=True, verbose=True)
+ loc_importer.bounds = columbus_county_location.location
+ loc_created_count = loc_importer.save('NAME10')
+
+ # Add in townships, deleting from their shapes any area already covered by a "proper" city.
+ starter_cities = Location.objects.filter(location_type=city_type)
+ within_cities = GEOSGeometry('MULTIPOLYGON EMPTY')
+ for city in starter_cities:
+ within_cities = within_cities.union(city.location)
+ city_pks = [l.pk for l in starter_cities]
+ township_layer = DataSource('%s/Township.shp' % zip_dir)[0]
+ loc_importer = LocationImporter(township_layer, city_type, filter_bounds=False, verbose=True)
+ loc_created_count = loc_importer.save('NAME')
+ townships = Location.objects.filter(location_type=city_type).exclude(pk__in=city_pks)
+ for township in townships:
+ township.name = '%s environs' % township.name.title()
+ township.slug = slugify(township.name)
+ township.location = township.location.difference(within_cities)
+ township.save()
+
+ # Zipcodes are used by the block importer
+ Location.objects.filter(location_type__slug='zipcodes').delete()
+ zip_layer = DataSource('%s/tl_2010_%s_zcta510.shp' % (zip_dir, STATE))[0]
+ zip_importer = ZipImporter(zip_layer, name_field='ZCTA5CE10', source='2010 Tiger/Census',
+ filter_bounds=True, verbose=True)
+ zip_importer.bounds = columbus_county_location.location
+ loc_created_count = zip_importer.save()
+
+ # Now we load them the blocks table.
+ print "Importing blocks, this may take several minutes ..."
+
+ importer = TigerImporter(
+ '%s/tl_2010_%s_edges.shp' % (zip_dir, COUNTY),
+ '%s/tl_2010_%s_featnames.dbf' % (zip_dir, COUNTY),
+ '%s/tl_2010_%s_faces.dbf' % (zip_dir, COUNTY),
+ '%s/tl_2010_%s_place10.shp' % (zip_dir, STATE),
+ fix_cities=True,
+ encoding='utf8',
+ filter_bounds=columbus_county_location.location)
+ num_created = importer.save()
+ print "Created %d blocks" % num_created
+
+ #########################
+
+ print "Populating streets and fixing addresses, these can take several minutes..."
+
+ # Note these scripts should be run ONCE, in this order,
+ # after you have imported *all* your blocks.
+
+ populate_streets.main(['streets'])
+ populate_streets.main(['block_intersections'])
+ populate_streets.main(['intersections'])
+ print "Done."
+
+ if not options['dir']:
+ print "Removing temp directory %s" % zip_dir
+ os.system('rm -rf %s' % zip_dir)

0 comments on commit 5d96a64

Please sign in to comment.