# 1. Import

In [1]:
import numpy as np
import pandas as pd

# 2. Read data

### 2.1. Weather data

In [2]:
df = pd.read_csv('weatherdata.csv')

In [3]:
df.shape

(1157, 8)

In [4]:
df.head()

Unnamed: 0,precipitation,day,month,year,temperature,dewpoint,humidity,wind
0,1.0,1,8,2015,79,58,50,10
1,0.0,2,8,2015,78,54,52,11
2,0.0,3,8,2015,79,64,67,13
3,0.49,4,8,2015,78,66,68,9
4,0.0,5,8,2015,75,57,58,11


In [5]:
df.day.loc[(df.year == 2012)&(df.month == 7)].min()

df.month.loc[df.year == 2012].min()

7

In [6]:
df.columns

Index(['precipitation', 'day', 'month', 'year', 'temperature', 'dewpoint',
       'humidity', 'wind'],
      dtype='object')

### 2.2. Crime data

In [7]:
crimes = pd.read_csv('crime-boston_1.csv')

In [8]:
crimes.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,DAY_OF_WEEK,DISTRICT,HOUR,Lat,Long,MONTH,OCCURRED_ON_DATE,OFFENSE_CODE_GROUP,REPORTING_AREA,SHOOTING,UCR_PART,YEAR,Day,Night,ToNight,ToDay
0,0,0,Sunday,2.0,6,42.346381,-71.103794,7,2012-07-08 06:00:00,Residential Burglary,629.0,0.0,1.0,2012,1,0,16,0
1,1,1,Sunday,1.0,6,42.316841,-71.074585,7,2012-07-08 06:03:00,Aggravated Assault,327.0,1.0,1.0,2012,1,0,16,0
2,2,2,Sunday,2.0,6,42.342841,-71.09699,7,2012-07-08 06:26:00,Robbery,625.0,0.0,1.0,2012,1,0,16,0
3,3,3,Sunday,1.0,6,42.316441,-71.065829,7,2012-07-08 06:56:00,Other,258.0,0.0,1.0,2012,1,0,16,0
4,4,4,Sunday,9.0,7,42.270516,-71.1199,7,2012-07-08 07:15:00,Robbery,496.0,0.0,1.0,2012,1,0,15,0


In [9]:
crimes = crimes.drop('Unnamed: 0',1)
crimes = crimes.drop('Unnamed: 0.1',1)

In [10]:
crimes.OCCURRED_ON_DATE = pd.to_datetime(crimes.OCCURRED_ON_DATE)

crimes.OCCURRED_ON_DATE.describe()

count                  575226
unique                 403402
top       2013-04-15 14:50:00
freq                       96
first     2012-07-08 06:00:00
last      2018-09-03 21:25:00
Name: OCCURRED_ON_DATE, dtype: object

In [11]:
# from 01/07/2012 to 31/08/2015

crimes = crimes.loc[crimes.OCCURRED_ON_DATE > '2012-07-01']
crimes = crimes.loc[crimes.OCCURRED_ON_DATE < '2015-09-01']

In [12]:
crimes.OCCURRED_ON_DATE.describe()

count                  276555
unique                 184693
top       2013-04-15 14:50:00
freq                       96
first     2012-07-08 06:00:00
last      2015-08-31 23:47:00
Name: OCCURRED_ON_DATE, dtype: object

In [13]:
crimes.shape

(276555, 16)

In [14]:
crimes['DayNumber'] = crimes.OCCURRED_ON_DATE.dt.day

In [15]:
crimes['DayNumber'].unique()

array([ 8,  9, 10, 11, 27, 12, 28, 13,  3, 14, 15, 16,  6, 17, 18, 19, 20,
       21, 22, 23, 24, 25,  7, 26, 29, 30, 31,  1,  2,  4,  5])

In [16]:
df.columns

Index(['precipitation', 'day', 'month', 'year', 'temperature', 'dewpoint',
       'humidity', 'wind'],
      dtype='object')

# 3. Merging

In [17]:
# create new weather columns for crime data

crimes['precipitation'] = 0
crimes['temperature'] = 0
crimes['dewpoint'] = 0
crimes['humidity'] = 0
crimes['wind'] = 0

### 3.1. Merging weather data with crime 2012

In [22]:
days_months = [
    [7,31],
    [8,31],
    [9,30],
    [10,31],
    [11,30],
    [12,31]


In [23]:
i = 0

In [24]:
while i < len(days_months):
    
    day_num = 1
    
    while day_num <= days_months[i][1]:
        
        # precipitation
        
        precipitation = df.precipitation.loc[
            (df.year == 2012) & (df.month == days_months[i][0]) & (df.day == day_num)
        ]
        
        crimes.precipitation.loc[
            (crimes.YEAR == 2012) & (crimes.MONTH == days_months[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(precipitation)
        
        
        # temperature
        
        temperature = df.temperature.loc[
            (df.year == 2012) & (df.month == days_months[i][0]) & (df.day == day_num)
        ]
        
        crimes.temperature.loc[
            (crimes.YEAR == 2012) & (crimes.MONTH == days_months[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(temperature)
        
        # dewpoint
        
        dewpoint = df.dewpoint.loc[
            (df.year == 2012) & (df.month == days_months[i][0]) & (df.day == day_num)
        ]
        
        crimes.dewpoint.loc[
            (crimes.YEAR == 2012) & (crimes.MONTH == days_months[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(dewpoint)
        
        # humidity
        
        humidity = df.humidity.loc[
            (df.year == 2012) & (df.month == days_months[i][0]) & (df.day == day_num)
        ]
        
        crimes.humidity.loc[
            (crimes.YEAR == 2012) & (crimes.MONTH == days_months[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(humidity)
        
        # wind
        
        wind = df.wind.loc[
            (df.year == 2012) & (df.month == days_months[i][0]) & (df.day == day_num)
        ]
        
        crimes.wind.loc[
            (crimes.YEAR == 2012) & (crimes.MONTH == days_months[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(wind)
        
        print(day_num)
        day_num +=1
    
    print(str(i)+'_________')
    i += 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
0_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
1_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
2_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
3_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
4_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
5_________


In [25]:
df.day.loc[(df.year==2014)&(df.month==2)].max()

28

### 3.2. Merging weather data with crime 2013

In [26]:
days_month_2013 = [
    [1,31],
    [2,28],
    [3,31],
    [4,30],
    [5,31],
    [6,30],
    [7,31],
    [8,31],
    [9,30],
    [10,31],
    [11,30],
    [12,31]
]

In [27]:
i = 0 
year = 2013

In [28]:
while i < len(days_month_2013):
    
    day_num = 1
    
    while day_num <= days_month_2013[i][1]:
        
        # precipitation
        
        precipitation = df.precipitation.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.precipitation.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(precipitation)
        
        
        # temperature
        
        temperature = df.temperature.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.temperature.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(temperature)
        
        # dewpoint
        
        dewpoint = df.dewpoint.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.dewpoint.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(dewpoint)
        
        # humidity
        
        humidity = df.humidity.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.humidity.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(humidity)
        
        # wind
        
        wind = df.wind.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.wind.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(wind)
        
        print(day_num)
        day_num +=1
    
    print(str(i)+'_________')
    i += 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
0_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
1_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
2_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
3_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
4_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
5_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
6_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
7_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
8_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
9_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

### 3.3. Merging weather data with crime 2014

In [29]:
i = 0 
year = 2014

In [30]:
while i < len(days_month_2013):
    
    day_num = 1
    
    while day_num <= days_month_2013[i][1]:
        
        # precipitation
        
        precipitation = df.precipitation.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.precipitation.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(precipitation)
        
        
        # temperature
        
        temperature = df.temperature.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.temperature.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(temperature)
        
        # dewpoint
        
        dewpoint = df.dewpoint.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.dewpoint.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(dewpoint)
        
        # humidity
        
        humidity = df.humidity.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.humidity.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(humidity)
        
        # wind
        
        wind = df.wind.loc[
            (df.year == year) & (df.month == days_month_2013[i][0]) & (df.day == day_num)
        ]
        
        crimes.wind.loc[
            (crimes.YEAR == year) & (crimes.MONTH == days_month_2013[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(wind)
        
        print(day_num)
        day_num +=1
    
    print(str(i)+'_________')
    i += 1

1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
0_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
1_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
2_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
3_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
4_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
5_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
6_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
7_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
8_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
9_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
2

### 2015

In [31]:
day_mon = [
    [1,31],
    [2,28],
    [3,31],
    [4,30],
    [5,31],
    [6,30],
    [7,31],
    [8,31]    
]

### 3.4. Merging weather data with crime 2015

In [32]:
i = 0
year = 2015

In [33]:
while i < len(day_mon):
    
    day_num = 1
    
    while day_num <= day_mon[i][1]:
        
        # precipitation
        
        precipitation = df.precipitation.loc[
            (df.year == year) & (df.month == day_mon[i][0]) & (df.day == day_num)
        ]
        
        crimes.precipitation.loc[
            (crimes.YEAR == year) & (crimes.MONTH == day_mon[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(precipitation)
        
        
        # temperature
        
        temperature = df.temperature.loc[
            (df.year == year) & (df.month == day_mon[i][0]) & (df.day == day_num)
        ]
        
        crimes.temperature.loc[
            (crimes.YEAR == year) & (crimes.MONTH == day_mon[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(temperature)
        
        # dewpoint
        
        dewpoint = df.dewpoint.loc[
            (df.year == year) & (df.month == day_mon[i][0]) & (df.day == day_num)
        ]
        
        crimes.dewpoint.loc[
            (crimes.YEAR == year) & (crimes.MONTH == day_mon[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(dewpoint)
        
        # humidity
        
        humidity = df.humidity.loc[
            (df.year == year) & (df.month == day_mon[i][0]) & (df.day == day_num)
        ]
        
        crimes.humidity.loc[
            (crimes.YEAR == year) & (crimes.MONTH == day_mon[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(humidity)
        
        # wind
        
        wind = df.wind.loc[
            (df.year == year) & (df.month == day_mon[i][0]) & (df.day == day_num)
        ]
        
        crimes.wind.loc[
            (crimes.YEAR == year) & (crimes.MONTH == day_mon[i][0]) &  (crimes.DayNumber == day_num)
        ] = float(wind)
        
        print(day_num)
        day_num +=1
    
    print(str(i)+'_________')
    i += 1

1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
0_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
1_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
2_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
3_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
4_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
5_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
6_________
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
7_________


### 3.5. Test

In [34]:
len(df.temperature.loc[df.year==2012].unique())

54

In [35]:
len(crimes.temperature.loc[crimes.YEAR==2012].unique())

53

In [36]:
crimes.shape

(276555, 22)

In [37]:
crimes.head()

Unnamed: 0,DAY_OF_WEEK,DISTRICT,HOUR,Lat,Long,MONTH,OCCURRED_ON_DATE,OFFENSE_CODE_GROUP,REPORTING_AREA,SHOOTING,...,Day,Night,ToNight,ToDay,DayNumber,precipitation,temperature,dewpoint,humidity,wind
0,Sunday,2.0,6,42.346381,-71.103794,7,2012-07-08 06:00:00,Residential Burglary,629.0,0.0,...,1,0,16,0,8,0.0,80.0,59.0,51.0,9.0
1,Sunday,1.0,6,42.316841,-71.074585,7,2012-07-08 06:03:00,Aggravated Assault,327.0,1.0,...,1,0,16,0,8,0.0,80.0,59.0,51.0,9.0
2,Sunday,2.0,6,42.342841,-71.09699,7,2012-07-08 06:26:00,Robbery,625.0,0.0,...,1,0,16,0,8,0.0,80.0,59.0,51.0,9.0
3,Sunday,1.0,6,42.316441,-71.065829,7,2012-07-08 06:56:00,Other,258.0,0.0,...,1,0,16,0,8,0.0,80.0,59.0,51.0,9.0
4,Sunday,9.0,7,42.270516,-71.1199,7,2012-07-08 07:15:00,Robbery,496.0,0.0,...,1,0,15,0,8,0.0,80.0,59.0,51.0,9.0


# Export new data

In [38]:
crimes.to_csv('crimes.csv')