Skip to content
This repository
Browse code

Census 2010 redistricting data import

  • Loading branch information...
commit 93646ff4247117b86445e5b0dbc798b10d4a3f50 1 parent e4cc911
Chris Schnaars cschnaars authored

Showing 1 changed file with 98 additions and 10 deletions. Show diff stats Hide diff stats

  1. +98 10 projects/census/LoadCensus2010RedistrictingData.py
108 projects/census/LoadCensus2010RedistrictingData.py
... ... @@ -1,32 +1,120 @@
1 1
2 2 '''
3 3 This script is used to import 2010 Census redisticting data.
4   -Written using Python 2.7.1
  4 +Written using Python 2.6.6
5 5
6   -Prior to running this script, you should set the source directory
7   -(srcDir) and make sure your three data files are in the appropriate
8   -directories: Geo, Data1 and Data2.
  6 +Prior to running this script, you should:
  7 +1 - Set the source directory (srcDir)
  8 +2 - Make sure your data files are in that directory.
  9 +3 - Set the names of your three SQLite tables (geotablename,
  10 + data1tablename, data2tablename)
  11 +
  12 +There are three types of files:
  13 + * Geographic header files (*geo.txt)
  14 + * Data files (first set) (*01.txt)
  15 + * Data files (second set) (*02.txt)
  16 +
  17 +The script will ignore any files that do not have a .txt extension.
  18 +Similarly, the program will stop if it finds a .txt file that does
  19 +not meet one of the above three criteria for valid files.
9 20
10   -You also should check the file extension and alter the "for datafile"
11   -line of code accordingly. Presently, the file extension is set to .txt.
12 21 '''
13 22
14 23 # import modules
15 24 import os
16 25 import glob
  26 +import sqlite3
17 27
18 28 # Specify source directory
19 29 srcDir = 'C:\\Data\\Census\\'
20 30
21   -# Note: We need to determine whether the Census files will
22   -# have a file extension and adjust the code below accordingly.
  31 +# Specify path of SQLite database
  32 +dbpath = '\\\\asb-bus02\\userdata\\cschnaars\\SQLite\\CenRedistData2010.sqlite'
  33 +
  34 +# Specify table names
  35 +geotablename = 'tblTest'
  36 +data1tablename = 'tblData1'
  37 +data2tablename = 'tblData2'
  38 +
  39 +# Connect to the sqlite database
  40 +db = sqlite3.connect(dbpath)
  41 +
  42 +# Create a cursor
  43 +cursor = cnx_object.cursor()
  44 +
  45 +# Run SQL scripts to create the data tables if they don't exist
  46 +SQL = 'CREATE TABLE IF NOT EXISTS "' + geotablename + '''" (
  47 +"FILEID" char(6) NOT NULL, "STUSAB" char(2) NOT NULL,
  48 +"SUMLEV" char(3) NOT NULL, "GEOCOMP" char(2) NOT NULL,
  49 +"CHARITER" char(3) NOT NULL, "CIFSN" char(2) NOT NULL,
  50 +"LOGRECNO" char(7) PRIMARY KEY NOT NULL UNIQUE, "REGION" char(1) NOT NULL,
  51 +"DIVISION" char(1) NOT NULL, "STATECODE" char(2) NOT NULL,
  52 +"COUNTY" char(3) DEFAULT NULL, "COUNTYCC" char(2) DEFAULT NULL,
  53 +"COUNTYSC" char(2) DEFAULT NULL, "COUSUB" char(5) DEFAULT NULL,
  54 +"COUSUBCC" char(2) DEFAULT NULL, "COUSUBSC" char(2) DEFAULT NULL,
  55 +"PLACE" char(5) DEFAULT NULL, "PLACECC" char(2) DEFAULT NULL,
  56 +"PLACESC" char(2) DEFAULT NULL, "TRACT" char(6) DEFAULT NULL,
  57 +"BLKGRP" char(1) DEFAULT NULL, "BLOCK" char(4) DEFAULT NULL,
  58 +"IUC" char(2) DEFAULT NULL, "CONCIT" char(5) DEFAULT NULL,
  59 +"CONCITCC" char(2) DEFAULT NULL, "CONCITSC" char(2) DEFAULT NULL,
  60 +"AIANHH" char(4) DEFAULT NULL, "AIANHHFP" char(5) DEFAULT NULL,
  61 +"AIANHHCC" char(2) DEFAULT NULL, "AIHHTLI" char(1) DEFAULT NULL,
  62 +"AITSCE" char(3) DEFAULT NULL, "AITS" char(5) DEFAULT NULL,
  63 +"AITSCC" char(2) DEFAULT NULL, "TTRACT" char(6) DEFAULT NULL,
  64 +"TBLKGRP" char(1) DEFAULT NULL, "ANRC" char(5) DEFAULT NULL,
  65 +"ANRCCC" char(2) DEFAULT NULL, "CBSA" char(5) DEFAULT NULL,
  66 +"CBSASC" char(2) DEFAULT NULL, "METDIV" char(5) DEFAULT NULL,
  67 +"CSA" char(3) DEFAULT NULL, "NECTA" char(5) DEFAULT NULL,
  68 +"NECTASC" char(2) DEFAULT NULL, "NECTADIV" char(5) DEFAULT NULL,
  69 +"CNECTA" char(3) DEFAULT NULL, "CBSAPCI" char(1) DEFAULT NULL,
  70 +"NECTAPCI" char(1) DEFAULT NULL, "UA" char(5) DEFAULT NULL,
  71 +"UASC" char(2) DEFAULT NULL, "UATYPE" char(1) DEFAULT NULL,
  72 +"UR" char(1) DEFAULT NULL, "CD" char(2) DEFAULT NULL,
  73 +"SLDU" char(3) DEFAULT NULL, "SLDL" char(3) DEFAULT NULL,
  74 +"VTD" char(6) DEFAULT NULL, "VTDI" char(1) DEFAULT NULL,
  75 +"RESERVE2" char(3) DEFAULT NULL, "ZCTA5" char(5) DEFAULT NULL,
  76 +"SUBMCD" char(5) DEFAULT NULL, "SUBMCDCC" char(2) DEFAULT NULL,
  77 +"SDELM" char(5) DEFAULT NULL, "SDSEC" char(5) DEFAULT NULL,
  78 +"SDUNI" char(5) DEFAULT NULL, "AREALAND" char(14) NOT NULL,
  79 +"AREAWATR" char(14) NOT NULL, "AREANAME" varchar(90) NOT NULL,
  80 +"FUNCSTAT" char(1) NOT NULL, "GCUNI" char(1) DEFAULT NULL,
  81 +"POP100" char(9) NOT NULL, "HU100" char(9) NOT NULL,
  82 +"INTPTLAT" char(11) NOT NULL, "INTPTLON" char(12) NOT NULL,
  83 +"LSADC" char(2) NOT NULL, "PARTFLAG" char(1) DEFAULT NULL,
  84 +"RESERVE3" char(6) DEFAULT NULL, "UGA" char(5) DEFAULT NULL,
  85 +"STATENS" char(8) NOT NULL, "COUNTYNS" char(8) DEFAULT NULL,
  86 +"COUSUBNS" char(8) DEFAULT NULL, "PLACENS" char(8) DEFAULT NULL,
  87 +"CONCITNS" char(8) DEFAULT NULL, "AIANHHNS" char(8) DEFAULT NULL,
  88 +"AITSNS" char(8) DEFAULT NULL, "ANRCNS" char(8) DEFAULT NULL,
  89 +"SUBMCDNS" char(8) DEFAULT NULL, "CD113" char(2) DEFAULT NULL,
  90 +"CD114" char(2) DEFAULT NULL, "CD115" char(2) DEFAULT NULL,
  91 +"SLDU2" char(3) DEFAULT NULL, "SLDU3" char(3) DEFAULT NULL,
  92 +"SLDU4" char(3) DEFAULT NULL, "SLDL2" char(3) DEFAULT NULL,
  93 +"SLDL3" char(3) DEFAULT NULL, "SLDL4" char(3) DEFAULT NULL,
  94 +"AIANHHSC" char(2) DEFAULT NULL, "CSASC" char(2) DEFAULT NULL,
  95 +"CNECTASC" char(2) DEFAULT NULL, "MEMI" char(1) DEFAULT NULL,
  96 +"NMEMI" char(1) DEFAULT NULL, "PUMA" char(5) DEFAULT NULL,
  97 +"RESERVED" char(18) DEFAULT NULL);'''
  98 +
  99 +cursor.execute(SQL)
23 100
24 101 # Iterate through each file in the directory
25 102 for datafile in glob.glob(os.path.join(srcDir, '*.txt')):
26 103
  104 + # Determine file type
  105 + if datafile.endswith('geo.txt'):
  106 + filetype = 'geo'
  107 + elif datafile.endswith('01.txt'):
  108 + filetype = 'data1'
  109 + elif datafile.endswith('02.txt'):
  110 + filetype = 'data2'
  111 + else:
  112 + print 'File not recognized: ' + datafile
  113 + break
  114 +
27 115 # Iterate through each line in the file
28   - for line in open(datafile, 'rb'): # Should this be 'r' or 'rb'?
29   - parseddata = line.split(',') # Copy the line to a list
  116 + #for line in open(datafile, 'rb'): # Should this be 'r' or 'rb'?
  117 + # parseddata = line.split(',') # Copy the line to a list
30 118
31 119
32 120

0 comments on commit 93646ff

Please sign in to comment.
Something went wrong with that request. Please try again.