# Crime data analysis

In [26]:
import pandas as pd
import numpy as np

In [27]:
df_geodata = pd.read_csv("sydney outdoor crime age gender inferred.csv")


In [28]:
df_geodata.head(3)

Unnamed: 0,FID,OBJECTID,bcsrgrp,bcsrcat,lganame,locsurb,locprmc1,locpcode,bcsrgclat,bcsrgclng,...,5.1,15.1,25.1,35.1,45.1,55.1,65.1,75.1,85.1,Age
0,0,1,Assault,Non-domestic violence related assault,Sydney,REDFERN,OUTDOOR/PUBLIC PLACE,2016,-33.89239,151.21479,...,0,0,0,0,0,1,0,0,0,55
1,1,2,Assault,Non-domestic violence related assault,Sydney,SYDNEY,OUTDOOR/PUBLIC PLACE,2000,-33.8677,151.20984,...,0,0,0,1,0,0,0,0,0,35
2,2,3,Assault,Non-domestic violence related assault,Sydney,WOOLLOOMOOLOO,OUTDOOR/PUBLIC PLACE,2011,-33.872671,151.2191,...,0,0,0,1,0,0,0,0,0,35


In [29]:
df_geodata['bcsrcat'].unique()

array(['Non-domestic violence related assault', 'Robbery without a weapon',
       'Robbery with a weapon not a firearm', 'Robbery with a firearm',
       'Graffiti', 'Motor vehicle theft',
       'Possession and/or use of amphetamines',
       'Possession and/or use of cocaine',
       'Possession and/or use of ecstasy',
       'Possession and/or use of cannabis',
       'Possession and/or use of narcotics',
       'Possession and/or use of other drugs', 'Steal from motor vehicle'], dtype=object)

In [30]:
relevant_columns = ('Non-domestic violence related assault',
                    'Robbery without a weapon',
                    'Robbery with a weapon not a firearm',
                    'Robbery with a firearm',
                   )
df_subsection = df_geodata[df_geodata['bcsrcat'].isin(relevant_columns)]

In [31]:
#unique values in incident categories
print(df_subsection['bcsrcat'].unique())

['Non-domestic violence related assault' 'Robbery without a weapon'
 'Robbery with a weapon not a firearm' 'Robbery with a firearm']


In [32]:
df_incident_hour = df_subsection['incsttm']
df_incident_hour.head(3)

0    4:00:00 PM
1    6:00:00 PM
2    1:30:00 AM
Name: incsttm, dtype: object

In [33]:
type(df_incident_hour)

pandas.core.series.Series

In [21]:
#making [lat,long] pairs from columns
df_incident_latlong = df_subsection[['bcsrgclat','bcsrgclng','incsttm','Gender','bcsrgrp']]
#rename columns to lat and long
df_incident_latlong = df_incident_latlong.rename(columns={'bcsrgclat':'lat','bcsrgclng':'long','incsttm':'time','bcsrgrp':'type'})


In [22]:
import datetime as dt

#df_incident_latlong['time'] = df_incident_latlong['time'].str.split(pat=':')

df_incident_latlong

Unnamed: 0,lat,long,time,Gender,type
0,-33.892390,151.214790,16:00,M,Assault
1,-33.867700,151.209840,18:00,M,Assault
2,-33.872671,151.219100,1:30,M,Assault
3,-33.870260,151.220190,3:00,M,Assault
4,-33.880070,151.215001,12:51,M,Assault
5,-33.882432,151.206701,2:00,M,Assault
6,-33.875190,151.224510,2:50,M,Assault
7,-33.863096,151.213056,2:40,M,Assault
8,-33.874540,151.212542,0:30,M,Assault
9,-33.856479,151.207925,0:01,M,Assault


In [23]:
df_incident_latlong
df_incident_latlong.to_json("incidents.json",orient="records")

In [24]:
import json
with open("incidents.json", 'r') as incident_json:
    incident_json = json.load(incident_json)

In [25]:
print(json.dumps(incident_json, indent=1))

[
 {
  "lat": -33.89239, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.21479, 
  "time": "16:00"
 }, 
 {
  "lat": -33.8677, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.20984, 
  "time": "18:00"
 }, 
 {
  "lat": -33.872671, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.2191, 
  "time": "1:30"
 }, 
 {
  "lat": -33.87026, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.22019, 
  "time": "3:00"
 }, 
 {
  "lat": -33.88007, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.215001, 
  "time": "12:51"
 }, 
 {
  "lat": -33.882432, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.206701, 
  "time": "2:00"
 }, 
 {
  "lat": -33.87519, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.22451, 
  "time": "2:50"
 }, 
 {
  "lat": -33.863096, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.213056, 
  "time": "2:40"
 }, 
 {
  "lat": -33.87454, 
  "Gender": "M", 
  "type": "Assault", 
  "long": 151.212542, 
  "time": "0:30"
 }, 
 {
  "lat": -33.85

In [17]:
print("min lat:",df_incident_latlong['lat'].min())
print("max lat:",df_incident_latlong['lat'].max())
print("min long:",df_incident_latlong['long'].min())
print("max long:",df_incident_latlong['long'].max())

('min lat:', -33.921909999999997)
('max lat:', -33.854118999999997)
('min long:', 151.17501999999999)
('max long:', 151.23292499999999)


In [58]:
df_incident_latlong['time']

0       [16, 00]
1       [18, 00]
2        [1, 30]
3        [3, 00]
4       [12, 51]
5        [2, 00]
6        [2, 50]
7        [2, 40]
8        [0, 30]
9        [0, 01]
10       [0, 45]
11      [22, 00]
12       [3, 30]
13      [22, 30]
14      [17, 45]
15       [5, 30]
16      [21, 48]
17       [2, 30]
18       [2, 30]
19       [1, 40]
20       [2, 30]
21       [5, 25]
22      [16, 30]
23      [16, 30]
24      [11, 40]
25      [14, 20]
26       [1, 30]
27       [2, 25]
28      [16, 40]
29      [20, 00]
          ...   
5344    [15, 00]
5345    [22, 10]
5346     [4, 00]
5347    [14, 00]
5348    [20, 30]
5349     [0, 45]
5350     [5, 30]
5351    [18, 30]
5352    [22, 00]
5353    [17, 30]
5354    [23, 05]
5355    [17, 12]
5356    [13, 30]
5357     [2, 30]
5358     [2, 50]
5359     [2, 00]
5360     [5, 15]
5361     [3, 30]
5362    [12, 00]
5363    [18, 40]
5364     [1, 30]
5365    [23, 40]
5366     [4, 00]
5367    [20, 45]
5368     [1, 00]
5369    [16, 00]
5370    [22, 00]
5371    [10, 1

In [11]:
len(df_subsection[df_subsection['Gender'] == "M"])


5172

In [12]:
len(df_subsection[df_subsection['Gender'] == "F"])

202