In [5]:
import pandas as pd
import matplotlib as mplt
import numpy as np
import torch
import placekey as pk

# Urbana Crime Dataset Data

**Interesting tool: https://data.urbanaillinois.us/d/uj4k-8xe8/visualization**

### October 2nd
1. Import necessary libraries (`pandas`,`numpy`,`matplotlib`,`torch`)
2. Import and read data from Urbana Crime Dataset (Champaign to come later)
3. Filter the columns based on pertinent data
4. Filter the dataframe using crimes that are actually important
   1. Figure out which crimes are mapped to which crime codes
5. Map certain crime codes to scores
   1. Use a dictionary: e.g. `{'Battery' : 7, 'Assault' : 10, 'Robbery' : 8, 'Sexual Assault' : 10}`

In [6]:
## Importing necessary data from Urbana Crime Dataset 

df = pd.read_csv('Urbana_Crime_Data.csv', low_memory=False)

cols = ['DATE OCCURRED', 'TIME OCCURRED', 'YEAR OCCURRED', 'MONTH OCCURRED', 'GEO CODE', 'HOUSE NUMBER BLOCK', 'STREET', 'CRIME CODE', 'CRIME DESCRIPTION', 'CRIME CATEGORY', 'MAPPING ADDRESS']

print(df.columns)

crimes = df[cols]

print(crimes.columns)

crimes.head()

Index(['INCIDENT', 'DATE OCCURRED', 'TIME OCCURRED', 'YEAR OCCURRED',
       'MONTH OCCURRED', 'DATE REPORTED', 'TIME REPORTED', 'DATE ARRIVED',
       'TIME ARRIVED', 'GEO CODE', 'HOUSE NUMBER BLOCK', 'STREET',
       'CROSS STREET', 'CRIME CODE', 'CRIME DESCRIPTION', 'CRIME CATEGORY',
       'CRIME CATEGORY DESCRIPTION', 'CSA DESCRIPTION', 'PLACE CODE',
       'PLACE CODE DESCRIPTION', 'WEAPONS CODE 1', 'WEAPON 1 DESCRIPTION',
       'WEAPONS CODE 2', 'WEAPON 2 DESCRIPTION', 'WEAPONS CODE 3',
       'WEAPON 3 DESCRIPTION', 'BIAS CODE', 'BIAS DESCRIPTION', 'STATUS CODE',
       'STATUS DESCRIPTION', 'COUNTER', 'MAPPING ADDRESS'],
      dtype='object')
Index(['DATE OCCURRED', 'TIME OCCURRED', 'YEAR OCCURRED', 'MONTH OCCURRED',
       'GEO CODE', 'HOUSE NUMBER BLOCK', 'STREET', 'CRIME CODE',
       'CRIME DESCRIPTION', 'CRIME CATEGORY', 'MAPPING ADDRESS'],
      dtype='object')


Unnamed: 0,DATE OCCURRED,TIME OCCURRED,YEAR OCCURRED,MONTH OCCURRED,GEO CODE,HOUSE NUMBER BLOCK,STREET,CRIME CODE,CRIME DESCRIPTION,CRIME CATEGORY,MAPPING ADDRESS
0,04/13/2019,17:20,2019,4,605-04,1100 BLOCK,COLORADO AV,1310.0,CRIMINAL DAMAGE TO PROPERTY,C17,"1100 COLORADO AV\nURBANA, IL\n(40.094723, -88...."
1,01/24/2019,16:13,2019,1,546-01,1200 BLOCK,EUREKA ST,486.0,BATTERY-DOMESTIC,C05,"1200 EUREKA ST\nURBANA, IL\n(40.125577, -88.22..."
2,05/21/2019,12:00,2019,5,576-06,600 BLOCK,VINE ST S,1110.0,DECEPTIVE PRACTICES,C16,"600 VINE ST S\nURBANA, IL\n(40.108475, -88.204..."
3,08/28/2019,07:45,2019,8,547-02,1300 BLOCK,CUNNINGHAM AVE,460.0,BATTERY,C05,"1300 CUNNINGHAM AVE\nURBANA, IL\n(40.125562, -..."
4,01/27/2019,10:45,2019,1,605-04,1200 BLOCK,FLORIDA AV E,1365.0,TRESPASS-RESIDENCE,C77,"1200 FLORIDA AV E\nURBANA, IL\n(40.098384, -88..."


In [7]:
## Mapping specific crimes to broader catogories

crime_category_cols = ['CRIME CATEGORY', 'CRIME DESCRIPTION']

crime_category_unique = crimes['CRIME CATEGORY'].drop_duplicates(keep='last')

crime_category_df = df[crime_category_cols].drop_duplicates(keep='last')
crime_category_df = crime_category_df.set_index('CRIME DESCRIPTION')

print(crime_category_df.columns)

crime_category_df.head(20)

# print(crime_category_df.index[0])

# print(crime_category_df.loc[crime_category_df.index[0] , 'CRIME CATEGORY'])


Index(['CRIME CATEGORY'], dtype='object')


Unnamed: 0_level_0,CRIME CATEGORY
CRIME DESCRIPTION,Unnamed: 1_level_1
METHAMPHETAMINE - MANUFACTURING,C23
TAXI PROVISIONAL LICENSE,C62
IMPROPER DEPOSIT OF GARBAGE,C47
PARKING-2 AM TO 6 AM,C28
PROSTITUTION,C19
UNLAWFUL USE OF A BUILDING,C24
IMPROPER U-TURN,C28
INTIMIDATION-AGGRAVATED,C32
GRADUATED DRIVER,C28
BOMB THREAT,C30


In [8]:
all_crime_categories = {}

first_index = crime_category_unique.first_valid_index()
last_index = crime_category_unique.last_valid_index()

indices = crime_category_df.index

for category in crime_category_unique:

    arr = []

    all_crime_categories.update({category : []})

    for i in range(len(crime_category_df)):
        curr = crime_category_df.loc[crime_category_df.index[i] , 'CRIME CATEGORY']

        if (curr is category):
            arr.append(crime_category_df.index[i])

    all_crime_categories[category] = arr

print(all_crime_categories)

{'C79': ['USE/SALE/DEL OF INTOXICATING COMPOU'], 'C84': ['TERRY STOPS'], 'C63': ['BURGLAR ALARM'], 'C43': ['PEDESTRIAN WALKING ON ROADWAY'], 'C75': ['ENDANGERMENT'], 'C59': ['PARKING ENFORCEMENT BOOT'], 'C57': ['OPEN BURNING'], 'C36': ['OTHER/MISC CITY ORDINANCE VIOLATION', 'OTHER CRIMINAL OFFENSES'], 'C65': ['BURGLARY TOOLS-POSSESSION'], 'C44': ['ALL OTHER PARK DIST VIOLATIONS'], 'C34': ['TERRORISM - MAKE THREATS'], 'C25': ['HYPODERMIC NEEDLE/SYRINGE-POSS/SALE'], 'C01': ['HOMICIDE-DRUG INDUCED', 'MURDER-FIRST DEGREE'], 'C82': ['OTHER PUBLIC COMPLAINTS'], 'C53': ['PARKING-HANDICAP', 'PARKING VIOLATIONS-ALL OTHER', 'PARKING VIOLATIONS-ILLEGAL PARKING'], 'C22': ['CANNABIS TRAFFICKING', 'POSS OF CANNABIS', 'UNLAWFUL CANNABIS MANUFACTURING', 'CANNABIS POSS OVER 100 G -FEL', 'DEL/MANUF CANNABIS UNDER 10 G -MISD', 'CANNABIS POSS OVER 10 - 100 G -MISD', 'CANNABIS POSS 10 GM OR LESS -ORD', 'DEL/MANUF CANNABIS OVER 30 G -FEL'], 'C32': ['INTIMIDATION-AGGRAVATED', 'CYBERSTALKING', 'STALKING', 'IN

In [9]:
scores = {'CO1' : 15, 'C02' : 12, 'C08' : 12, 'C03' : 10, 'C05' : 10, 'C09' : 10, 'C34' : 10, 'C10' : 8, 'C11' : 8, 'C28' : 8, 'C27' : 6}

In [10]:
crime_categories = {}

for crime in all_crime_categories.keys():
    if crime in scores:
        crime_categories.update({crime : all_crime_categories[crime]})

In [11]:
coord_cols = []

for c in range(crimes.first_valid_index(), crimes.last_valid_index() + 1):
    all_info = (crimes.loc[c])['MAPPING ADDRESS']
    split_info = all_info.split('\n')
    coords = split_info[2]
    coord_cols.append(coords)

crimes['COORDINATES'] = coord_cols

del crimes['MAPPING ADDRESS']

crimes.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crimes['COORDINATES'] = coord_cols


Unnamed: 0,DATE OCCURRED,TIME OCCURRED,YEAR OCCURRED,MONTH OCCURRED,GEO CODE,HOUSE NUMBER BLOCK,STREET,CRIME CODE,CRIME DESCRIPTION,CRIME CATEGORY,COORDINATES
0,04/13/2019,17:20,2019,4,605-04,1100 BLOCK,COLORADO AV,1310.0,CRIMINAL DAMAGE TO PROPERTY,C17,"(40.094723, -88.194808)"
1,01/24/2019,16:13,2019,1,546-01,1200 BLOCK,EUREKA ST,486.0,BATTERY-DOMESTIC,C05,"(40.125577, -88.225343)"
2,05/21/2019,12:00,2019,5,576-06,600 BLOCK,VINE ST S,1110.0,DECEPTIVE PRACTICES,C16,"(40.108475, -88.204841)"
3,08/28/2019,07:45,2019,8,547-02,1300 BLOCK,CUNNINGHAM AVE,460.0,BATTERY,C05,"(40.125562, -88.20034)"
4,01/27/2019,10:45,2019,1,605-04,1200 BLOCK,FLORIDA AV E,1365.0,TRESPASS-RESIDENCE,C77,"(40.098384, -88.194241)"


In [12]:
filtered_crimes = crimes

for c in range(crimes.first_valid_index(), crimes.last_valid_index() + 1):
    if (crimes.loc[c])['CRIME CATEGORY'] not in scores:
        filtered_crimes = filtered_crimes.drop(c)

filtered_crimes.head()


Unnamed: 0,DATE OCCURRED,TIME OCCURRED,YEAR OCCURRED,MONTH OCCURRED,GEO CODE,HOUSE NUMBER BLOCK,STREET,CRIME CODE,CRIME DESCRIPTION,CRIME CATEGORY,COORDINATES
1,01/24/2019,16:13,2019,1,546-01,1200 BLOCK,EUREKA ST,486.0,BATTERY-DOMESTIC,C05,"(40.125577, -88.225343)"
3,08/28/2019,07:45,2019,8,547-02,1300 BLOCK,CUNNINGHAM AVE,460.0,BATTERY,C05,"(40.125562, -88.20034)"
7,05/23/2019,15:14,2019,5,578-02,100 BLOCK,HIGH CROSS RD S,6621.0,FAILURE TO REDUCE SPEED,C28,"(40.111989, -88.162671)"
8,04/28/2019,10:01,2019,4,605-13,800 BLOCK,WINDSOR RD E,6601.0,SPEEDING (RADAR),C28,"(40.083797, -88.199147)"
14,03/23/2019,18:15,2019,3,606-03,2500 BLOCK,PHILO RD,8612.0,THEFT-RETAIL-CITY,C11,"(40.085441, -88.190537)"


In [13]:
def find_offenses(code):
    for c in range(crimes.first_valid_index(), crimes.last_valid_index() + 1):
        if (crimes.loc[c])['CRIME CATEGORY'] == code:
            print(all_crime_categories[code])
            break


find_offenses('C01')

['HOMICIDE-DRUG INDUCED', 'MURDER-FIRST DEGREE']


In [21]:
def find_locations(code):
    locations = []
    for c in range(crimes.first_valid_index(), crimes.last_valid_index() + 1):
        if (crimes.loc[c])['CRIME CATEGORY'] == code:
            locations.append((crimes.loc[c])['COORDINATES'])

    return locations

find_locations('C01')

['(40.116511, -88.210833)',
 '(40.091492, -88.191794)',
 '(40.101671, -88.190785)',
 '(40.094693, -88.198612)',
 '(40.094693, -88.198612)',
 '(40.103684, -88.199024)',
 '(40.120997, -88.20527)',
 '(40.112734, -88.210352)',
 '(40.103805, -88.188643)',
 '(40.105222, -88.173583)',
 '(40.118402, -88.224074)',
 '(40.102807, -88.189599)',
 '(40.104866, -88.176392)']

In [15]:
def find_score(street):
    score = 1000
    for c in filtered_crimes.iterrows():
        if (street in str(c[1])):
            location_score = scores[(crimes.loc[c[0]])['CRIME CATEGORY']]
            score -= location_score
       
    print(score)

find_score('UNIVERSITY AVE W')


334


In [16]:
import placekey as pk
lat, long = 40.116511, -88.210833
pk.geo_to_placekey(lat, long)

'@5pt-y8c-ygk'

In [42]:
def find_latitude(location):
    coordinates = location.split(',')
    latitude = coordinates[0].replace('(','')
    return float(latitude)

def find_longitude(location):
    coordinates = location.split(',')
    longitude = coordinates[1].replace(')','')
    return float(longitude)

def find_placekey(lat, long):
    latitude, longitude = lat, long
    return pk.geo_to_placekey(latitude, longitude)

In [43]:
location_placekeys = []

for location in find_locations('C01'):
    latitude = find_latitude(location)
    longitude = find_longitude(location)
    location_placekeys.append(find_placekey(latitude, longitude))

print(location_placekeys)

['@5pt-y8c-ygk', '@5pt-y8z-d7q', '@5pt-y8f-gkz', '@5pt-y8d-rtv', '@5pt-y8d-rtv', '@5pt-y8d-mp9', '@5pt-y88-qzz', '@5pt-y8f-rp9', '@5pt-y8f-jqf', '@5pt-y89-jn5', '@5pt-y8c-3t9', '@5pt-y8f-jy9', '@5pt-y89-kxq']
