In [1]:
import matplotlib

In [2]:
!ls

analysis.py  Crime Data Analysis.ipynb	crime_data_analysis.md	data


In [3]:
!ls data

crime_incident_data_2011.csv  crime_incident_data_2014.csv
crime_incident_data_2012.csv  crime_incident_data_recent.csv
crime_incident_data_2013.csv


In [4]:
import glob

In [5]:
crime_data = glob.glob('data/*.csv')

In [33]:
crime_data

['data/crime_incident_data_2013.csv',
 'data/crime_incident_data_2012.csv',
 'data/crime_incident_data_2011.csv',
 'data/crime_incident_data_2014.csv',
 'data/crime_incident_data_recent.csv']

In [34]:
with open(crime_data[0], 'r') as f:
    print(f.readline())

"Record ID","Report Date","Report Time","Major Offense Type","Address","Neighborhood","Police Precinct","Police District",X Coordinate,Y Coordinate



In [56]:
csv_fields = str()
with open(crime_data[0], 'r') as f:
    csv_fields = f.readline()

In [57]:
csv_fields

'"Record ID","Report Date","Report Time","Major Offense Type","Address","Neighborhood","Police Precinct","Police District",X Coordinate,Y Coordinate\n'

In [58]:
# Split out our unformatted data.
csv_fields = csv_fields.split(',')
# Remove excess quotes.
csv_fields = [field.replace('"', '').strip() for field in csv_fields]
# Casefold and replace spaces with underscores.
csv_fields = [field.casefold().replace(' ', '_') for field in csv_fields]

In [59]:
csv_fields

['record_id',
 'report_date',
 'report_time',
 'major_offense_type',
 'address',
 'neighborhood',
 'police_precinct',
 'police_district',
 'x_coordinate',
 'y_coordinate']

In [53]:
csv_fields_string = ', '.join(csv_fields)

In [64]:
crimes = list()

In [65]:
from collections import namedtuple

Crime = namedtuple('Crime', csv_fields_string)

In [76]:
import csv

for crime in map(Crime._make, csv.reader(open(crime_data[0], "r"))):
    crimes.append(crime)

In [77]:
crime_counts = dict()

In [78]:
for crime in crimes:
    crime_counts[crime.major_offense_type] = crime_counts.get(crime.major_offense_type, 0) + 1

In [79]:
crime_counts

{'Aggravated Assault': 3474,
 'Arson': 344,
 'Assault, Simple': 8034,
 'Burglary': 8058,
 'Curfew': 58,
 'DUII': 3552,
 'Disorderly Conduct': 8092,
 'Drugs': 6514,
 'Embezzlement': 278,
 'Forgery': 2228,
 'Fraud': 3326,
 'Gambling': 4,
 'Homicide': 32,
 'Kidnap': 18,
 'Larceny': 43858,
 'Liquor Laws': 5522,
 'Major Offense Type': 2,
 'Motor Vehicle Theft': 6726,
 'Offenses Against Family': 90,
 'Prostitution': 390,
 'Rape': 410,
 'Robbery': 1804,
 'Runaway': 3024,
 'Sex Offenses': 738,
 'Stolen Property': 154,
 'Trespass': 5624,
 'Vandalism': 10448,
 'Weapons': 758}

In [83]:
def crimes_by_occurance(crimes):
    crime_counts = dict()
    for crime in crimes:
        crime_counts[crime.major_offense_type] = crime_counts.get(crime.major_offense_type, 0) + 1
        
    return OrderedDict(sorted(crime_counts.items(), key=lambda c: c[1]))

In [84]:
crimes_by_occurance(crimes)

OrderedDict([('Major Offense Type', 2),
             ('Gambling', 4),
             ('Kidnap', 18),
             ('Homicide', 32),
             ('Curfew', 58),
             ('Offenses Against Family', 90),
             ('Stolen Property', 154),
             ('Embezzlement', 278),
             ('Arson', 344),
             ('Prostitution', 390),
             ('Rape', 410),
             ('Sex Offenses', 738),
             ('Weapons', 758),
             ('Robbery', 1804),
             ('Forgery', 2228),
             ('Runaway', 3024),
             ('Fraud', 3326),
             ('Aggravated Assault', 3474),
             ('DUII', 3552),
             ('Liquor Laws', 5522),
             ('Trespass', 5624),
             ('Drugs', 6514),
             ('Motor Vehicle Theft', 6726),
             ('Assault, Simple', 8034),
             ('Burglary', 8058),
             ('Disorderly Conduct', 8092),
             ('Vandalism', 10448),
             ('Larceny', 43858)])

In [130]:
def crimes_by_fields(_crimes, **kwargs):
    crimes = list()
    for crime in _crimes:
        checks = len(kwargs.items())
        
        for name, arg in kwargs.items():
            if getattr(crime, name).casefold() == arg.casefold():
                checks -= 1
                
        if checks == 0:
            crimes.append(crime)
    
    return crimes

In [189]:
crimes_by_fields(crimes, neighborhood='pearl', major_offense_type='burglary')

[Crime(record_id='14134123', report_date='05/14/2013', report_time='19:33:00', major_offense_type='Burglary', address='301-399 block of NW 9TH AVE, PORTLAND, OR 97209', neighborhood='PEARL', police_precinct='PORTLAND PREC CE', police_district='821', x_coordinate='7643416.7975700004', y_coordinate='685089.59415999998'),
 Crime(record_id='14182806', report_date='07/29/2013', report_time='10:40:00', major_offense_type='Burglary', address='100-198 block of NW 11TH AVE, PORTLAND, OR 97209', neighborhood='PEARL', police_precinct='PORTLAND PREC CE', police_district='821', x_coordinate='7642898.0636499999', y_coordinate='684568.34251999995'),
 Crime(record_id='14232236', report_date='10/16/2013', report_time='12:55:00', major_offense_type='Burglary', address='200-298 block of NW 11TH AVE, PORTLAND, OR 97209', neighborhood='PEARL', police_precinct='PORTLAND PREC CE', police_district='821', x_coordinate='7642896.53773', y_coordinate='684830.11023999995'),
 Crime(record_id='14243227', report_date

In [170]:
from datetime import datetime

def get_crime_time(crime):
    date_format = f'{crime.report_date} {crime.report_time}'
    try:
        return datetime.strptime(f'{crime.report_date} {crime.report_time}', '%m/%d/%Y %X')
    except ValueError:
        return datetime.now()
        
    

In [175]:
get_crime_time(crimes[0])

datetime.datetime(2017, 6, 21, 1, 26, 27, 81503)

In [None]:
from pprint import pprint

pprint(list(sorted(crimes[1:], key=lambda c: get_crime_time(c)))[10:])