In [1]:
import pandas as pd

#### Part 1. Tasks:
- Import the Python csv module.
- Create a Python file object in read mode for crime_sampler.csv called csvfile.
- Create an empty list called crime_data.
- Loop over a csv reader on the file object :
- Inside the loop, append the date (first element), type of crime (third element), location description (fifth element), and arrest (sixth element) to the crime_data list.
- Remove the first element (headers) from the crime_data list.
- Print the first 10 records of the crime_data list. This has been done for you, so hit 'Submit Answer' to see the result!

In [2]:
crime_info = pd.read_csv('https://assets.datacamp.com/production/repositories/906/datasets/7fe0304955dbf05e3a0d57c8959578dcef479e81/crime_sampler.csv', sep = ',')

In [3]:
crime_info.head()

Unnamed: 0,Date,Block,Primary Type,Description,Location Description,Arrest,Domestic,District
0,05/23/2016 05:35:00 PM,024XX W DIVISION ST,ASSAULT,SIMPLE,STREET,False,True,14
1,03/26/2016 08:20:00 PM,019XX W HOWARD ST,BURGLARY,FORCIBLE ENTRY,SMALL RETAIL STORE,False,False,24
2,04/25/2016 03:05:00 PM,001XX W 79TH ST,THEFT,RETAIL THEFT,DEPARTMENT STORE,True,False,6
3,04/26/2016 05:30:00 PM,010XX N PINE AVE,BATTERY,SIMPLE,SIDEWALK,False,False,15
4,06/19/2016 01:15:00 AM,027XX W AUGUSTA BLVD,BATTERY,AGGRAVATED: HANDGUN,SIDEWALK,False,False,12


In [4]:
crime_data = []

for row in range(len(crime_info)):
    crime_data.append([
        crime_info.loc[row, "Date"],
        crime_info.loc[row, "Primary Type"],
        crime_info.loc[row, "Location Description"],
        crime_info.loc[row, "Arrest"]
    ])

In [5]:
crime_data[:5]

[['05/23/2016 05:35:00 PM', 'ASSAULT', 'STREET', False],
 ['03/26/2016 08:20:00 PM', 'BURGLARY', 'SMALL RETAIL STORE', False],
 ['04/25/2016 03:05:00 PM', 'THEFT', 'DEPARTMENT STORE', True],
 ['04/26/2016 05:30:00 PM', 'BATTERY', 'SIDEWALK', False],
 ['06/19/2016 01:15:00 AM', 'BATTERY', 'SIDEWALK', False]]

In [6]:
crime_data[-5:]

[['02/07/2016 12:34:00 PM', 'OTHER OFFENSE', 'RESIDENCE', False],
 ['08/19/2016 08:55:00 AM', 'THEFT', 'SMALL RETAIL STORE', False],
 ['02/15/2016 08:00:00 AM', 'THEFT', 'APARTMENT', False],
 ['10/21/2016 11:00:00 PM', 'THEFT', 'STREET', False],
 ['01/20/2017 10:31:00 PM',
  'INTERFERENCE WITH PUBLIC OFFICER',
  'STREET',
  True]]

#### Part 2. Tasks:

- Import Counter from collections and datetime from datetime.
- Create a Counter object called crimes_by_month.
- Loop over the crime_data list:
- Using the datetime.strptime() function, convert the first element of each item into a Python Datetime Object called date.
- Increment the counter for the month associated with this row by one. You can access the month of date using date.month.
- Print the 3 most common months for crime.

In [7]:
from collections import Counter
from datetime import datetime

In [8]:
crimes_by_month = Counter()

In [9]:
for row in crime_data:
    date = datetime.strptime(row[0], '%m/%d/%Y %I:%M:%S %p')
    crimes_by_month[date.month] += 1
    
print(crimes_by_month.most_common(3))


[(1, 1948), (2, 1862), (7, 1257)]


#### Part 3. Tasks:

- Import defaultdict from collections and datetime from datetime.
- Create a dictionary that defaults to a list called locations_by_month.
- Loop over the crime_data list:
    - Convert the first element to a date object exactly like you did in the previous exercise.
    - If the year is 2016, set the key of locations_by_month to be the month of date and .append() the location (fifth element of row) to the values list.
- Print the dictionary.

In [10]:
from collections import defaultdict

In [11]:
locations_by_month = defaultdict(list)

In [12]:
for row in crime_data:
    date = datetime.strptime(row[0], '%m/%d/%Y %I:%M:%S %p')
    if date.year == 2016:
        locations_by_month[date.month].append(row[2])
        
# print(locations_by_month)


#### Part 4. Tasks:

- Loop over the items from your dictionary, using tuple expansion to unpack locations_by_month.items() into month and locations.
- Make a Counter of the locations called location_count.
- Print the month.
- Print the five most common crime locations.


In [13]:
for month, locations in locations_by_month.items():
    location_count = Counter(locations)
    print(month)
    #print(location_count.most_common(5))

5
3
4
6
7
10
12
1
9
11
8
2


#### Part 5. Tasks:

- Create a Python file object in read mode for crime_sampler.csv called csvfile.
- Create a dictionary that defaults to a list called crimes_by_district.
- Loop over a DictReader of the CSV file:
- Pop 'District' from each row and store it as district.
- Append the rest of the data (row) to the district key of crimes_by_district.

In [14]:
# solution found to use dictreader on http csv files


import requests
from contextlib import closing
import csv

url = 'https://assets.datacamp.com/production/repositories/906/datasets/7fe0304955dbf05e3a0d57c8959578dcef479e81/crime_sampler.csv'

crimes_by_district = defaultdict(list)

with closing(requests.get(url, stream=True)) as r:
    f = (line.decode('utf-8') for line in r.iter_lines())
    reader = csv.DictReader(f, delimiter=',', quotechar='"')
    for row in reader:
        district = row.pop('District')
        crimes_by_district[district].append(row)

In [15]:
crimes_by_district.keys()

dict_keys(['14', '24', '6', '15', '12', '7', '1', '11', '18', '22', '5', '16', '9', '8', '3', '2', '19', '10', '4', '17', '20', '25', '31'])

In [16]:
# crimes_by_district
# uncomment the above to see the created dictionary organized
# with all information assigned to the district keys above

#### Part 6. Tasks:

- Loop over the crimes_by_district dictionary, unpacking it into the variables district and crimes.
    - Create an empty Counter object called year_count.
        - Loop over the crimes:
            - If there was an arrest, Convert crime['Date'] to a datetime object called year.
            - Add the crime to the Counter for the year, by using year as the key of year_count.
- Print the Counter. 

In [26]:
# the main goal here is to print the crimes by district

for district, crimes in crimes_by_district.items():
    print(district)
    
    year_count = Counter()
    
    for crime in crimes:
        if crime['Arrest'] == 'true':
            year = datetime.strptime(crime['Date'], '%m/%d/%Y %I:%M:%S %p').year
            year_count[year] += 1
            
    print(year_count)
    

14
Counter({2016: 59, 2017: 8})
24
Counter({2016: 51, 2017: 10})
6
Counter({2016: 157, 2017: 32})
15
Counter({2016: 154, 2017: 16})
12
Counter({2016: 72, 2017: 9})
7
Counter({2016: 181, 2017: 27})
1
Counter({2016: 124, 2017: 15})
11
Counter({2016: 275, 2017: 53})
18
Counter({2016: 92, 2017: 17})
22
Counter({2016: 78, 2017: 12})
5
Counter({2016: 149, 2017: 30})
16
Counter({2016: 66, 2017: 9})
9
Counter({2016: 116, 2017: 17})
8
Counter({2016: 124, 2017: 26})
3
Counter({2016: 98, 2017: 18})
2
Counter({2016: 84, 2017: 15})
19
Counter({2016: 88, 2017: 11})
10
Counter({2016: 144, 2017: 20})
4
Counter({2016: 134, 2017: 15})
17
Counter({2016: 38, 2017: 5})
20
Counter({2016: 27, 2017: 8})
25
Counter({2016: 150, 2017: 26})
31
Counter({2016: 1})


In [33]:
### for part 7 the dictionary needs to be reorganized 
### to crimes by block

crimes_by_block = defaultdict(list)

with closing(requests.get(url, stream=True)) as r:
    f = (line.decode('utf-8') for line in r.iter_lines())
    reader = csv.DictReader(f, delimiter=',', quotechar='"')
    for row in reader:
        block = row.pop('Block')
        crimes_by_block[block].append(row['Primary Type'])

In [34]:
crimes_by_block['024XX W DIVISION ST']

['ASSAULT', 'THEFT', 'BURGLARY']

#### Part 7. Tasks:

- Create a unique list of crimes for the '001XX N STATE ST' block called n_state_st_crimes and print it.
- Create a unique list of crimes for the '0000X W TERMINAL ST' block called w_terminal_st_crimes and print it.
- Find the crimes committed on 001XX N STATE ST but not 0000X W TERMINAL ST. Store the result as crime_differences and print it.

In [37]:
n_state_st_crimes = set(crimes_by_block['001XX N STATE ST'])
print(n_state_st_crimes)

w_terminal_st_crimes = set(crimes_by_block['0000X W TERMINAL ST'])
print(w_terminal_st_crimes)

crime_differences = n_state_st_crimes.difference(w_terminal_st_crimes)
print(crime_differences)

{'DECEPTIVE PRACTICE', 'CRIMINAL DAMAGE', 'ASSAULT', 'CRIMINAL TRESPASS', 'OTHER OFFENSE', 'ROBBERY', 'BATTERY', 'THEFT'}
{'DECEPTIVE PRACTICE', 'CRIMINAL DAMAGE', 'ASSAULT', 'PUBLIC PEACE VIOLATION', 'NARCOTICS', 'OTHER OFFENSE', 'CRIMINAL TRESPASS', 'THEFT'}
{'ROBBERY', 'BATTERY'}
