In [1]:
import geopandas as gpd
import os, lzma, csv, collections
import pyproj
import shapely.geometry
import open_cp.sources.chicago

In [3]:
data_dir = os.path.join("/media", "disk", "Data")
#data_dir = os.path.join("..", "..", "..", "..", "Data")
#os.listdir(data_dir)
filename = os.path.join(data_dir, "chicago_all.csv.xz")

In [5]:
with lzma.open(filename, "rt", encoding="utf8") as f:
    reader = csv.reader(f)
    header = next(reader)
    data = []
    for row in reader:
        if row[15] == "":
            continue
        year = int(row[17])
        if year <= 2001:
            continue
        block, x, y = row[3], float(row[15]), float(row[16])
        x /= open_cp.sources.chicago._FEET_IN_METERS
        y /= open_cp.sources.chicago._FEET_IN_METERS
        data.append((block, x, y, row[5], row[6], row[7]))
len(data)

5768060

In [6]:
proj = pyproj.Proj({"init":"epsg:3528"})

frame = gpd.GeoDataFrame({"block" : [r[0] for r in data],
    "crime" : [r[3] for r in data],
    "type" : [r[4] for r in data],
    "location" : [r[5] for r in data]
    })

geo = []
for x, y in zip(*proj([r[1] for r in data], [r[2] for r in data], inverse=True)):
    geo.append( shapely.geometry.Point(x,y) )
frame.geometry = geo
frame.crs = {"init" : "epsg:4326"}

frame.head()

Unnamed: 0,block,crime,location,type,geometry
0,022XX N CANNON DR,THEFT,STREET,OVER $500,POINT (-87.63221518349633 41.92244026811861)
1,037XX N WILTON AVE,DECEPTIVE PRACTICE,OTHER,FRAUD OR CONFIDENCE GAME,POINT (-87.6532109705951 41.95014588475505)
2,077XX S EUCLID AVE,THEFT,RESIDENCE,AGG: FINANCIAL ID THEFT,POINT (-87.57728228874838 41.75463186307462)
3,068XX S WASHTENAW AVE,THEFT,APARTMENT,FROM BUILDING,POINT (-87.69071181858655 41.76929624572653)
4,0000X N KENTON AVE,PROSTITUTION,STREET,SOLICIT FOR PROSTITUTE,POINT (-87.74011082104049 41.88106436775494)


### Save

In [5]:
frame.to_file("chicago_post_2001")

### Further study

http://blogs.findlaw.com/blotter/2015/01/whats-the-difference-between-burglary-robbery-and-theft.html

- Theft is stealing
- Robbery is theft with physical violence or threat of
- Burglary is trespass with intent

In [7]:
frame.crime.unique()

array(['THEFT', 'DECEPTIVE PRACTICE', 'PROSTITUTION', 'BATTERY', 'ASSAULT',
       'ROBBERY', 'CRIMINAL DAMAGE', 'NARCOTICS', 'CRIMINAL TRESPASS',
       'OFFENSE INVOLVING CHILDREN', 'OTHER OFFENSE',
       'MOTOR VEHICLE THEFT', 'BURGLARY', 'PUBLIC PEACE VIOLATION',
       'WEAPONS VIOLATION', 'ARSON', 'KIDNAPPING',
       'INTERFERENCE WITH PUBLIC OFFICER', 'CRIM SEXUAL ASSAULT',
       'SEX OFFENSE', 'GAMBLING', 'STALKING', 'INTIMIDATION',
       'LIQUOR LAW VIOLATION', 'NON-CRIMINAL', 'PUBLIC INDECENCY',
       'OBSCENITY', 'OTHER NARCOTIC VIOLATION',
       'NON-CRIMINAL (SUBJECT SPECIFIED)', 'NON - CRIMINAL', 'HOMICIDE',
       'CONCEALED CARRY LICENSE VIOLATION', 'HUMAN TRAFFICKING',
       'RITUALISM'], dtype=object)

In [9]:
f = frame[(frame.crime == "THEFT") | (frame.crime == "ROBBERY")]
f.to_file("chicago_post_2001_theft_robbery")

In [10]:
f = frame[(frame.crime == "BURGLARY")]
f.to_file("chicago_post_2001_burglary")

# Old data

In [7]:
filename = os.path.join(data_dir, "chicago_all_old.csv")
with open(filename, "rt", encoding="utf8") as f:
    reader = csv.reader(f)
    header = next(reader)
    data = []
    for row in reader:
        if row[15] == "":
            continue
        block, x, y = row[3], float(row[15]), float(row[16])
        x /= open_cp.sources.chicago._FEET_IN_METERS
        y /= open_cp.sources.chicago._FEET_IN_METERS
        data.append((block, x, y, row[5], row[6], row[7]))
len(data)

5492749

In [8]:
proj = pyproj.Proj({"init":"epsg:3528"})

frame = gpd.GeoDataFrame({"block" : [r[0] for r in data],
    "crime" : [r[3] for r in data],
    "type" : [r[4] for r in data],
    "location" : [r[5] for r in data]
    })

geo = []
for x, y in zip(*proj([r[1] for r in data], [r[2] for r in data], inverse=True)):
    geo.append( shapely.geometry.Point(x,y) )
frame.geometry = geo
frame.crs = {"init" : "epsg:4326"}

frame.head()

Unnamed: 0,block,crime,location,type,geometry
0,087XX S KINGSTON AVE,BATTERY,APARTMENT,DOMESTIC BATTERY SIMPLE,POINT (-87.56256927017955 41.73642174778224)
1,028XX E 106TH ST,WEAPONS VIOLATION,VEHICLE NON-COMMERCIAL,RECKLESS FIREARM DISCHARGE,POINT (-87.55449999320267 41.70286757095072)
2,002XX W SUPERIOR ST,MOTOR VEHICLE THEFT,STREET,AUTOMOBILE,POINT (-87.63433696578942 41.89569153235757)
3,016XX W 71ST ST,PUBLIC PEACE VIOLATION,STREET,RECKLESS CONDUCT,POINT (-87.66393039094419 41.76500767433839)
4,016XX W 71ST ST,BATTERY,SIDEWALK,AGGRAVATED:KNIFE/CUTTING INSTR,POINT (-87.66523915591245 41.76499059371392)


In [9]:
frame.to_file("chicago_old")