In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt

In [2]:
pd.options.display.max_rows = 1000

df = pd.read_csv("mpls_crime_2017-2022.csv", encoding="utf-8")

In [3]:
df.sample(n=5)

Unnamed: 0,neighborhoodCrimeStatisticsID,neighborhood,ucrDescription,number,reportMonth,reportYear
41816,41817,Jordan,Aggravated Assault,12.0,7,2022
18635,18636,Lind - Bohanon,Burglary,10.0,10,2019
36870,36871,Hiawatha,Rape,0.0,12,2021
23644,23645,Marshall Terrace,Homicide,0.0,5,2020
13366,13367,Windom Park,Rape,0.0,2,2019


In [4]:
def clean_df(a_df):
    
    a_df = a_df.drop("neighborhoodCrimeStatisticsID", axis=1)
    a_df = a_df.loc[df["neighborhood"] != "Z_** NOT ASSIGNED **"]
    a_df = a_df.loc[df["neighborhood"] != "** NOT ASSIGNED **"]
    a_df["neighborhood"] = a_df["neighborhood"].replace("CARAG", "South Uptown")
    a_df["neighborhood"] = a_df["neighborhood"].replace("ECCO", "East Bde Maka Ska")
    a_df["neighborhood"] = a_df["neighborhood"].replace("WEST CALHOUN", "West Maka Ska")
    a_df["neighborhood"] = a_df.neighborhood.str.lower()
    a_df["neighborhood"] = a_df["neighborhood"].replace("stevens square - loring heights", "steven's square - loring heights")
    a_df["neighborhood"] = a_df["neighborhood"].replace("west calhoun", "west maka ska")
    
    return a_df.copy()
    

In [5]:
# Breakpoint

df = clean_df(df).sort_values(by="neighborhood")

In [6]:
# 480 data points for each neighborhood, accounting for name changes!

# df.neighborhood.value_counts()



In [7]:
# Mpls Neighborhood "Communities"

CALHOUN_ISLES = ["bryn - mawr", "cedar - isles - dean", "east bde maka ska", "east isles", "kenwood", "lowry hill", \
                 "lowry hill east", "south uptown", "west maka ska"]

CAMDEN = ["camden industrial", "cleveland", "folwell", "humboldt industrial area", "lind - bohanon", "mckinley", \
          "shingle creek", "victory", "webber - camden"]

CENTRAL = ["downtown east", "downtown west", "elliot park", "loring park", "north loop", "steven's square - loring heights"]

LONGFELLOW = ["cooper", "hiawatha", "howe", "longfellow", "seward"]

NEAR_NORTH = ["harrison", "hawthorne", "jordan", "near - north", "sumner - glenwood", "willard - hay"]

NOKOMIS = ["diamond lake", "ericsson", "field", "hale", "keewaydin", "minnehaha", "morris park", \
           "northrop", "page", "regina", "wenonah"]

NORTHEAST = ["audubon park", "beltrami", "bottineau", "columbia park", "holland", "logan park", "marshall terrace", \
             "northeast park", "sheridan", "st. anthony east", "st. anthony west", "waite park", "windom park"]

PHILLIPS = ["east phillips", "midtown phillips", "phillips west", "ventura village"]

POWDERHORN = ["bancroft", "bryant", "central", "corcoran", "lyndale", "powderhorn park", "standish", "whittier"]
 
SOUTHWEST = ["armatage", "east harriet", "fulton", "kenny", "king field", "linden hills", "lynnhurst", "tangletown", \
             "windom"]

UNIVERSITY = ["cedar riverside", "como", "marcy holmes", "mid - city industrial", "nicollet island - east bank", \
              "prospect park - east river road", "university of minnesota"]

In [8]:
# Make list of communities names, each a list of neighborhoods

COMMUNITIES = ["CALHOUN_ISLES", "CAMDEN", "CENTRAL", "LONGFELLOW", "NEAR_NORTH", "NOKOMIS", "NORTHEAST", \
              "PHILLIPS", "POWDERHORN", "SOUTHWEST", "UNIVERSITY"]

In [9]:
# As per FBI ucr reporting standards

violent_crimes = ["Aggravated Assault", "Homicide", "Rape", "Robbery"]

property_crimes = ["Arson", "Auto Theft", "Burglary", "Larceny"]

In [11]:
# Narrow down to just months and type of crime I'm interested in

before_violent = df[ (df["reportMonth"]==5) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(violent_crimes)) ]
before_violent = before_violent.sort_values(by=["neighborhood", "ucrDescription"])

before_violent = before_violent.drop(["reportMonth", "reportYear"], axis=1)

# before_violent

In [12]:
after_violent = df[ (df["reportMonth"]==6) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(violent_crimes)) ]
after_violent = after_violent.sort_values(by=["neighborhood", "ucrDescription"])

after_violent = after_violent.drop(["reportMonth", "reportYear"], axis=1)

# after_violent

In [13]:
before_property = df[ (df["reportMonth"]==5) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(property_crimes)) ]
before_property = before_property.sort_values(by=["neighborhood", "ucrDescription"])

before_property = before_property.drop(["reportMonth", "reportYear"], axis=1)

# before_property

In [14]:
after_property = df[ (df["reportMonth"]==6) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(property_crimes)) ]
after_property = after_property.sort_values(by=["neighborhood", "ucrDescription"])

after_property = after_property.drop(["reportMonth", "reportYear"], axis=1)

# after_property

In [20]:
CALH_bp = before_property.loc[ (before_property["neighborhood"].isin(CALHOUN_ISLES)), : ].groupby("neighborhood").sum().sum()

CAMD_bp = before_property.loc[ (before_property["neighborhood"].isin(CAMDEN)), : ].groupby("neighborhood").sum().sum()

CENT_bp = before_property.loc[ (before_property["neighborhood"].isin(CENTRAL)), : ].groupby("neighborhood").sum().sum()

LONG_bp = before_property.loc[ (before_property["neighborhood"].isin(LONGFELLOW)), : ].groupby("neighborhood").sum().sum()

NEAR_bp = before_property.loc[ (before_property["neighborhood"].isin(NEAR_NORTH)), : ].groupby("neighborhood").sum().sum()

NOKO_bp = before_property.loc[ (before_property["neighborhood"].isin(NOKOMIS)), : ].groupby("neighborhood").sum().sum()

NORT_bp = before_property.loc[ (before_property["neighborhood"].isin(NORTHEAST)), : ].groupby("neighborhood").sum().sum()

PHIL_bp = before_property.loc[ (before_property["neighborhood"].isin(PHILLIPS)), : ].groupby("neighborhood").sum().sum()

POWD_bp = before_property.loc[ (before_property["neighborhood"].isin(POWDERHORN)), : ].groupby("neighborhood").sum().sum()

SOUT_bp = before_property.loc[ (before_property["neighborhood"].isin(SOUTHWEST)), : ].groupby("neighborhood").sum().sum()

UNIV_bp = before_property.loc[ (before_property["neighborhood"].isin(UNIVERSITY)), : ].groupby("neighborhood").sum().sum()


number    171.0
dtype: float64


In [None]:
CALH_ap = after_property.loc[ (after_property["neighborhood"].isin(CALHOUN_ISLES)), : ].groupby("neighborhood").sum().sum()

CAMD_ap = after_property.loc[ (after_property["neighborhood"].isin(CAMDEN)), : ].groupby("neighborhood").sum().sum()

CENT_ap = after_property.loc[ (after_property["neighborhood"].isin(CENTRAL)), : ].groupby("neighborhood").sum().sum()

LONG_ap = after_property.loc[ (after_property["neighborhood"].isin(LONGFELLOW)), : ].groupby("neighborhood").sum().sum()

NEAR_ap = after_property.loc[ (after_property["neighborhood"].isin(NEAR_NORTH)), : ].groupby("neighborhood").sum().sum()

NOKO_ap = after_property.loc[ (after_property["neighborhood"].isin(NOKOMIS)), : ].groupby("neighborhood").sum().sum()

NORT_ap = after_property.loc[ (after_property["neighborhood"].isin(NORTHEAST)), : ].groupby("neighborhood").sum().sum()

PHIL_ap = after_property.loc[ (after_property["neighborhood"].isin(PHILLIPS)), : ].groupby("neighborhood").sum().sum()

POWD_ap = after_property.loc[ (after_property["neighborhood"].isin(POWDERHORN)), : ].groupby("neighborhood").sum().sum()

SOUT_ap = after_property.loc[ (after_property["neighborhood"].isin(SOUTHWEST)), : ].groupby("neighborhood").sum().sum()

UNIV_ap = after_property.loc[ (after_property["neighborhood"].isin(UNIVERSITY)), : ].groupby("neighborhood").sum().sum()


In [None]:
CALH_bv = before_violence.loc[ (before_violence["neighborhood"].isin(CALHOUN_ISLES)), : ].groupby("neighborhood").sum().sum()

CAMD_bv = before_violence.loc[ (before_violence["neighborhood"].isin(CAMDEN)), : ].groupby("neighborhood").sum().sum()

CENT_bv = before_violence.loc[ (before_violence["neighborhood"].isin(CENTRAL)), : ].groupby("neighborhood").sum().sum()

LONG_bv = before_violence.loc[ (before_violence["neighborhood"].isin(LONGFELLOW)), : ].groupby("neighborhood").sum().sum()

NEAR_bv = before_violence.loc[ (before_violence["neighborhood"].isin(NEAR_NORTH)), : ].groupby("neighborhood").sum().sum()

NOKO_bv = before_violence.loc[ (before_violence["neighborhood"].isin(NOKOMIS)), : ].groupby("neighborhood").sum().sum()

NORT_bv = before_violence.loc[ (before_violence["neighborhood"].isin(NORTHEAST)), : ].groupby("neighborhood").sum().sum()

PHIL_bv = before_violence.loc[ (before_violence["neighborhood"].isin(PHILLIPS)), : ].groupby("neighborhood").sum().sum()

POWD_bv = before_violence.loc[ (before_violence["neighborhood"].isin(POWDERHORN)), : ].groupby("neighborhood").sum().sum()

SOUT_bv = before_violence.loc[ (before_violence["neighborhood"].isin(SOUTHWEST)), : ].groupby("neighborhood").sum().sum()

UNIV_bv = before_violence.loc[ (before_violence["neighborhood"].isin(UNIVERSITY)), : ].groupby("neighborhood").sum().sum()


In [None]:
CALH_av = after_violence.loc[ (after_violence["neighborhood"].isin(CALHOUN_ISLES)), : ].groupby("neighborhood").sum().sum()

CAMD_av = after_violence.loc[ (after_violence["neighborhood"].isin(CAMDEN)), : ].groupby("neighborhood").sum().sum()

CENT_av = after_violence.loc[ (after_violence["neighborhood"].isin(CENTRAL)), : ].groupby("neighborhood").sum().sum()

LONG_av = after_violence.loc[ (after_violence["neighborhood"].isin(LONGFELLOW)), : ].groupby("neighborhood").sum().sum()

NEAR_av = after_violence.loc[ (after_violence["neighborhood"].isin(NEAR_NORTH)), : ].groupby("neighborhood").sum().sum()

NOKO_av = after_violence.loc[ (after_violence["neighborhood"].isin(NOKOMIS)), : ].groupby("neighborhood").sum().sum()

NORT_av = after_violence.loc[ (after_violence["neighborhood"].isin(NORTHEAST)), : ].groupby("neighborhood").sum().sum()

PHIL_av = after_violence.loc[ (after_violence["neighborhood"].isin(PHILLIPS)), : ].groupby("neighborhood").sum().sum()

POWD_av = after_violence.loc[ (after_violence["neighborhood"].isin(POWDERHORN)), : ].groupby("neighborhood").sum().sum()

SOUT_av = after_violence.loc[ (after_violence["neighborhood"].isin(SOUTHWEST)), : ].groupby("neighborhood").sum().sum()

UNIV_av = after_violence.loc[ (after_violence["neighborhood"].isin(UNIVERSITY)), : ].groupby("neighborhood").sum().sum()
