# Processing 2013 crime data

In [1]:
import pandas as pd

data = pd.read_csv("crimes-in-chicago/Crimes_-_2013.csv")

data.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,20859,HW254987,05/01/2013 01:26:00 AM,019XX S DRAKE AVE,110,HOMICIDE,FIRST DEGREE MURDER,ALLEY,True,False,...,24.0,29.0,01A,1153059.0,1890107.0,2013,05/11/2016 03:48:18 PM,41.854302,-87.713697,"(41.854302497, -87.713697274)"
1,11227517,JB138481,02/10/2013 12:00:00 AM,071XX S LAFAYETTE AVE,266,CRIM SEXUAL ASSAULT,PREDATORY,RESIDENCE,False,False,...,6.0,69.0,02,,,2013,02/11/2018 03:57:41 PM,,,
2,10517342,HZ258721,05/10/2013 11:00:00 AM,036XX S WOLCOTT AVE,266,CRIM SEXUAL ASSAULT,PREDATORY,RESIDENCE,False,False,...,11.0,59.0,02,,,2013,05/11/2016 03:50:55 PM,,,
3,10501659,HZ242860,01/24/2013 12:00:00 AM,011XX N LECLAIRE AVE,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,False,True,...,37.0,25.0,02,1138913.0,1903775.0,2013,02/10/2018 03:50:01 PM,41.892078,-87.765287,"(41.892077708, -87.765287486)"
4,10518625,HZ259943,03/18/2013 10:25:00 AM,002XX E OHIO ST,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,,False,False,...,42.0,8.0,11,,,2013,05/12/2016 03:50:15 PM,,,


In [2]:
data.columns

Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

#### Take out the needed columns

In [3]:
data = data[["Date", "Primary Type", "District", "Community Area", "Latitude", "Longitude"]]
data.head()

Unnamed: 0,Date,Primary Type,District,Community Area,Latitude,Longitude
0,05/01/2013 01:26:00 AM,HOMICIDE,10,29.0,41.854302,-87.713697
1,02/10/2013 12:00:00 AM,CRIM SEXUAL ASSAULT,7,69.0,,
2,05/10/2013 11:00:00 AM,CRIM SEXUAL ASSAULT,9,59.0,,
3,01/24/2013 12:00:00 AM,OFFENSE INVOLVING CHILDREN,15,25.0,41.892078,-87.765287
4,03/18/2013 10:25:00 AM,DECEPTIVE PRACTICE,18,8.0,,


#### Remove null value in location

In [4]:
data2 = data[data['Date'].notnull() & data['Latitude'].notnull() & data['Longitude'].notnull() & data['Primary Type'].notnull()]

In [5]:
data2 = data2.reset_index()

#### Add Time period column and Datetime column

In [6]:
time_period = []

date_time = []

for index, row in data2.iterrows():
    date = pd.to_datetime(row["Date"])
    date_time.append(date)
    if 4<=date.hour<12:
        time_period.append("morning")
    elif 12<=date.hour<20:
        time_period.append("afternoon")
    else:
        time_period.append("evening")

In [7]:
data2["Time Period"] = time_period
data2["Date"] = date_time

In [8]:
data2.head()

Unnamed: 0,index,Date,Primary Type,District,Community Area,Latitude,Longitude,Time Period
0,0,2013-05-01 01:26:00,HOMICIDE,10,29.0,41.854302,-87.713697,evening
1,3,2013-01-24 00:00:00,OFFENSE INVOLVING CHILDREN,15,25.0,41.892078,-87.765287,evening
2,7,2013-11-07 08:00:00,THEFT,4,43.0,41.758868,-87.576839,morning
3,11,2013-09-27 00:00:00,CRIM SEXUAL ASSAULT,8,66.0,41.781299,-87.705627,evening
4,26,2013-11-18 12:38:00,WEAPONS VIOLATION,11,27.0,41.875934,-87.700402,afternoon


In [9]:
data2.to_pickle("crime_data_2013.pkl")