In [1]:
#Import Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from matplotlib.pyplot import figure
from sklearn import datasets
from scipy.stats import linregress
import datetime

In [2]:
#Load Dataset
file = "Resources/crime_data.csv"

crime_data = pd.read_csv(file)

crime_df = pd.DataFrame(crime_data)
#crime_df.head(10)

In [3]:
del crime_df["neighborhoodCrimeStatisticsID"]
#crime_df

In [4]:
#Data Clean up
crime_df = crime_df.rename(columns={
    "neighborhood" : "Neighborhood",
    "ucrDescription" : "Crime",
    "number" : "Number of Cases",
    "reportMonth" : "Month",
    "reportYear" : "Year"
})
#crime_df

crime_df["Date"] = crime_df["Month"].astype(str) +"/"+ crime_df["Year"].astype(str)
#crime_df["Neighborhood"].unique()

In [5]:
#Cleaning Up Neighborhoods
crime_df["Neighborhood"] = crime_df["Neighborhood"].replace({
    '** NOT ASSIGNED **' : "Unknown",
    'Z_** NOT ASSIGNED **' : "Unknown",
    'ARMATAGE' : 'Armatage',
    'AUDUBON PARK' : 'Audubon Park',
    'BANCROFT' : 'Bancroft',
    'BELTRAMI' : 'Beltrami',
    'BOTTINEAU' : 'Bottineau',
    'BRYANT' : 'Bryant',
    'BRYN - MAWR' : 'Bryn - Mawr',
    'CAMDEN INDUSTRIAL' :'Camden Industrial',
    'CARAG' : "Carag",
    'CEDAR - ISLES - DEAN' : 'Cedar - Isles - Dean',
    'CEDAR RIVERSIDE' : 'Cedar Riverside',
    'CENTRAL' : 'Central',
    'CLEVELAND' : 'Cleveland',
    'COLUMBIA PARK' : 'Columbia Park',
    'COMO' : 'Como',
    'COOPER' : 'Cooper',
    'CORCORAN' : 'Corcoran',
    'DIAMOND LAKE' : 'Diamond Lake',
    'DOWNTOWN EAST' : 'Downtown East',
    'DOWNTOWN WEST' : 'Downtown West',
    'EAST HARRIET' : 'East Harriet',
    'EAST ISLES' : 'East Isles',
    'EAST PHILLIPS' : 'East Phillips',
    'ECCO' : "Ecco",
    'ELLIOT PARK' : 'Elliot Park',
    'ERICSSON' : 'Ericsson',
    'FIELD' : 'Field',
    'FOLWELL' : 'Folwell',
    'FULTON' : 'Fulton',
    'HALE' : 'Hale',
    'HARRISON' : 'Harrison',
    'HAWTHORNE' : 'Hawthorne',
    'HIAWATHA' : 'Hiawatha',
    'HOLLAND' : 'Holland',
    'HOWE' : 'Howe',
    'HUMBOLDT INDUSTRIAL AREA' : 'Humboldt Industrial Area',
    'JORDAN' : 'Jordan',
    'KEEWAYDIN' : 'Keewaydin',
    'KENNY' : 'Kenny', 
    'KENWOOD' : 'Kenwood', 
    'KING FIELD' : 'King Field', 
    'LIND - BOHANON' : 'Lind - Bohanon',
    'LINDEN HILLS' : 'Linden Hills', 
    'LOGAN PARK' : 'Logan Park', 
    'LONGFELLOW' : 'Longfellow', 
    'LORING PARK' : 'Loring Park',
    'LOWRY HILL' : 'Lowry Hill', 
    'LOWRY HILL EAST' : 'Lowry Hill East', 
    'LYNDALE' : 'Lyndale', 
    'LYNNHURST' : 'Lynnhurst',
    'MARCY HOLMES' : 'Marcy Holmes', 
    'MARSHALL TERRACE' : 'Marshall Terrace', 
    'MCKINLEY' : 'McKinley',
    'MID - CITY INDUSTRIAL' : 'Mid - City Industrial', 
    'MIDTOWN PHILLIPS' : 'Midtown Phillips', 
    'MINNEHAHA' : 'Minnehaha',
    'MORRIS PARK' : 'Morris Park', 
    'NEAR - NORTH' : 'Near - North', 
    'NICOLLET ISLAND - EAST BANK' : 'Nicollet Island - East Bank',
    'NORTH LOOP' : 'North Loop', 
    'NORTHEAST PARK' : 'Northeast Park', 
    'NORTHROP' : 'Northrop', 
    'PAGE' : 'Page',
    'PHILLIPS WEST' : 'Phillips West', 
    'POWDERHORN PARK' : 'Powderhorn Park',
    'PROSPECT PARK - EAST RIVER ROAD' : 'Prospect Park - East River Road', 
    'REGINA' : 'Regina', 
    'SEWARD' : 'Seward', 
    'SHERIDAN' : 'Sheridan',
    'SHINGLE CREEK' : 'Shingle Creek', 
    'ST. ANTHONY EAST' : 'St. Anthony East', 
    'ST. ANTHONY WEST' : 'St. Anthony West',
    'STANDISH' : 'Standish', 
    'STEVENS SQUARE - LORING HEIGHTS' : "Steven's Square - Loring Heights", 
    'SUMNER - GLENWOOD' : 'Sumner - Glenwood',
    'TANGLETOWN' : 'Tangletown', 
    'UNIVERSITY OF MINNESOTA' : 'University of Minnesota', 
    'VENTURA VILLAGE' : 'Ventura Village',
    'VICTORY' : 'Victory', 
    'WAITE PARK' : 'Waite Park', 
    'WEBBER - CAMDEN' : 'Webber - Camden', 
    'WENONAH' : 'Wenonah',
    'WEST CALHOUN' : 'West Calhoun', 
    'WHITTIER' : 'Whittier', 
    'WILLARD - HAY' : 'Willard - Hay', 
    'WINDOM' : 'Windom',
    'WINDOM PARK' : 'Windom Park',
    "CARAG" : "South Uptown",
    'ECCO' : "East Bde Maka Ska",
    "West Calhoun" : "West Maka Ska"
})
#crime_df["Neighborhood"].unique()

In [6]:
#More Clean up
#Removing the no-named Neighborhoods
crime_df = crime_df[crime_df["Neighborhood"] != "Unknown"]
#crime_df

#Removing data outside our timeline
crime_df = crime_df[crime_df["Year"] > 2017]

#Fill NaN
crime_df["Number of Cases"] = crime_df["Number of Cases"].fillna(0)
#crime_df.head(10)

In [7]:
#Final Cleaned DataFrame
crime_df = crime_df[["Neighborhood","Crime","Number of Cases","Date"]]
crime_df1 = crime_df[["Neighborhood","Crime","Number of Cases","Date"]]
crime_df.set_index(keys="Date",inplace=True)
crime_df1.set_index(keys="Neighborhood",inplace=True)

In [8]:
#Crime Before Vs After
crime_df.head(20)

crime_df.to_csv("Resources/crime.csv",encoding="utf8")
crime_df1.to_csv("Resources/crime1.csv",encoding="utf8")
gf_df = crime_df.loc["4/2020":"6/2020",:]
gf_df.to_csv("Resources/george_floyd.csv",encoding="utf8")
before_df = crime_df.loc["5/2018":"5/2020",:]
before_df.to_csv("Resources/before_floyd.csv",encoding="utf8")
after_df = crime_df.loc["6/2020":,:]
after_df.to_csv("Resources/after_floyd.csv",encoding="utf8")