In [1]:
# Dependencies
import requests
import pandas as pd
import datetime

### Data Retrieval

In [2]:
# Query url for HPD recent crime reports (rpt) from COHGIS Open Data Portal
url = "https://opendata.arcgis.com/datasets/8d515a90e80840b3bc7a3ada352b0d15_0.geojson"

# Get HPD crime reports data and save as "hpd_crime_rpt"
hpd_crime_rpt = requests.get(url).json()

# View "hpd_crime_rpt"
hpd_crime_rpt

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'OBJECTID': 1,
    'Join_Count': 1,
    'TARGET_FID': 1,
    'Join_Count_1': 1,
    'Incident_No': 30807419,
    'Time_Begun': '2019-03-11T08:30:00.000Z',
    'UCR_No': 6073,
    'HPD_Beat': '3B50',
    'HPD_District': '3',
    'Offense': 'Theft',
    'Address_Range': '4400-4499 NORTH ',
    'Premise_Type': 'Department or Discount St',
    'X_Coord': 3115832.95607,
    'Y_Coord': 13866716.49565,
    'Zip_Code': '77022',
    'Time_Frame': 3,
    'SNB_No': 45,
    'SNB_Name': 'NORTHSIDE/NORTHLINE',
    'Council_District': 'H',
    'HPD_Division': 'North Division'},
   'geometry': {'type': 'Point',
    'coordinates': [-95.38082655494368, 29.828978489304113]}},
  {'type': 'Feature',
   'properties': {'OBJECTID': 2,
    'Join_Count': 1,
    'TARGET_FID': 2,
    'Join_Count_1': 1,
    'Incident_No': 31567919,
    'Time_Begun': '2019-03-12T21:21:00.000Z',
    'UCR_No': 3023,
    'HPD_Beat': '14D10',
    'HPD_Dis

In [3]:
# Convert "hpd_crime_report" to Pandas dataframe
hpd_crime_rpt_pd = pd.DataFrame(hpd_crime_rpt)

# View "hpd_crime_rpt_pd"
hpd_crime_rpt_pd.head()

Unnamed: 0,type,features
0,FeatureCollection,"{'type': 'Feature', 'properties': {'OBJECTID':..."
1,FeatureCollection,"{'type': 'Feature', 'properties': {'OBJECTID':..."
2,FeatureCollection,"{'type': 'Feature', 'properties': {'OBJECTID':..."
3,FeatureCollection,"{'type': 'Feature', 'properties': {'OBJECTID':..."
4,FeatureCollection,"{'type': 'Feature', 'properties': {'OBJECTID':..."


In [4]:
# Write "hpd_crime_rpt_pd" to csv file
hpd_crime_rpt_pd.to_csv("../data/rawdata/hpd_crime_report.csv", index=False, header=True)

### Data Cleaning

###### >> Sort out retrieved crime lists

In [5]:
# Grab "features" data from "hpd_crime_rpt"
crime_list = hpd_crime_rpt["features"]

# Check the length of "crime_list"
print(f"There are {len(crime_list)} crime cases reported in the HPD recent crime report")

# Preview a representative "crime_list" structure
crime_list[0]

There are 7368 crime cases reported in the HPD recent crime report


{'type': 'Feature',
 'properties': {'OBJECTID': 1,
  'Join_Count': 1,
  'TARGET_FID': 1,
  'Join_Count_1': 1,
  'Incident_No': 30807419,
  'Time_Begun': '2019-03-11T08:30:00.000Z',
  'UCR_No': 6073,
  'HPD_Beat': '3B50',
  'HPD_District': '3',
  'Offense': 'Theft',
  'Address_Range': '4400-4499 NORTH ',
  'Premise_Type': 'Department or Discount St',
  'X_Coord': 3115832.95607,
  'Y_Coord': 13866716.49565,
  'Zip_Code': '77022',
  'Time_Frame': 3,
  'SNB_No': 45,
  'SNB_Name': 'NORTHSIDE/NORTHLINE',
  'Council_District': 'H',
  'HPD_Division': 'North Division'},
 'geometry': {'type': 'Point',
  'coordinates': [-95.38082655494368, 29.828978489304113]}}

###### >>Categorize types of crimes reported

In [6]:
# list to store types of crimes
crime_type = []

# Loop through "crime_list"
for case in crime_list:
    
    # Append unique values of crime types to "crime_type" list
    if case["properties"]["Offense"] not in crime_type:
        crime_type.append(case["properties"]["Offense"])

# View "crime_type"
crime_type

['Theft',
 'Robbery',
 'Burglary',
 'Aggravated Assault',
 'Rape',
 'Auto Theft',
 'Murder']

In [7]:
# List for violet crimes
violet_crime = ["Aggravated Assault", "Murder", "Robbery", "Rape"]

# List for non-violet crimes
non_violet_crime = ["Theft", "Burglary", "Auto Theft"]

###### >> Slim and clean up crime data

In [8]:
# Generate a new empty Pandas dataframe with columns of "Offense", "Crime Type", "Lat", and "Lng"
crime_info_pd = pd.DataFrame(columns = ["Offense", "Crime Type", "Lat", "Lng"])

# Check "crime_info_pd"
crime_info_pd

Unnamed: 0,Offense,Crime Type,Lat,Lng


In [9]:
# List to store date of crime
crime_date = []

# Loop through "crime_list"
for case in crime_list:

    # Note that adding value to column immediately changes the length of "crime_info_pd"
    # Temporarily assign (fix) it to intermediate variable "im" to be referred before each iteration
    im = len(crime_info_pd)
    
    # Append data to "Offense" column of "crime_info_pd"
    crime_info_pd.loc[im, "Offense"] = case["properties"]["Offense"]
    
    # Determine crime type (violet/non-violet) via comparison with "violet_crime" and "non_violet_crime" lists
    if case["properties"]["Offense"] in violet_crime:
        crime_info_pd.loc[im, "Crime Type"] = "Violet"
    if case["properties"]["Offense"] in non_violet_crime:
        crime_info_pd.loc[im, "Crime Type"] = "Non-Violet"
    
    # Append longitude value to "Lng" column
    crime_info_pd.loc[im, "Lng"] = case["geometry"]["coordinates"][0]

    # Append Latitude value to "Lat" column
    crime_info_pd.loc[im, "Lat"] = case["geometry"]["coordinates"][1]   
    
    # Convert date info. into datetime format and temporatorily assign it to "date_im"
    date_im = datetime.datetime.strptime(case["properties"]["Time_Begun"][:10], "%Y-%m-%d")
    # Append date info into "crime_date" list as "yyyy-mm-dd"
    crime_date.append(date_im.strftime("%Y-%m-%d"))
    
# Preview "crime_info_pd"
crime_info_pd.head()

Unnamed: 0,Offense,Crime Type,Lat,Lng
0,Theft,Non-Violet,29.829,-95.3808
1,Robbery,Violet,29.7036,-95.3501
2,Burglary,Non-Violet,29.768,-95.3189
3,Burglary,Non-Violet,29.5605,-95.1256
4,Robbery,Violet,29.9216,-95.4103


In [10]:
# Remove cases of crime with missing data if any
crime_info_pd = crime_info_pd.dropna(how="any")

# Check the length of "crime_info_pd"
len(crime_info_pd)

7368

In [11]:
# Print the date range of HPD recent crime data 
date_range = str("from " + min(crime_date) + " to " + max(crime_date))

print("Crime Report" + " " + date_range)

Crime Report from 2019-02-21 to 2019-03-20


In [12]:
# Write "crime_info_pd" to csv file
crime_info_pd.to_csv("../data/cleandata/HPD_Crime_Report.csv", index=False, header=True)