In [None]:
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt

In [None]:
pd.options.display.max_rows = 1000

df = pd.read_csv("mpls_crime_2017-2022.csv", encoding="utf-8")

In [None]:
df.sample(n=5)

In [None]:
df = df.drop("neighborhoodCrimeStatisticsID", axis=1)
df

In [None]:
df.neighborhood.unique()

In [None]:
df = df.loc[df["neighborhood"] != "Z_** NOT ASSIGNED **"]
df = df.loc[df["neighborhood"] != "** NOT ASSIGNED **"]

df.neighborhood.value_counts()

In [None]:
df["neighborhood"] = df["neighborhood"].replace("CARAG", "South Uptown")
df["neighborhood"] = df["neighborhood"].replace("ECCO", "East Bde Maka Ska")
df["neighborhood"] = df["neighborhood"].replace("WEST CALHOUN", "West Maka Ska")

df["neighborhood"] = df.neighborhood.str.lower()

df.neighborhood.value_counts()

In [None]:
df["neighborhood"] = df["neighborhood"].replace("stevens square - loring heights", "steven's square - loring heights")

df["neighborhood"] = df["neighborhood"].replace("west calhoun", "west maka ska")

df.neighborhood.value_counts()

# 480 data points for each neighborhood, accounting for name changes!

In [None]:
df

In [None]:
df.ucrDescription.unique()

In [None]:
violent_crimes = ["Aggravated Assault", "Homicide", "Rape", "Robbery"]

property_crimes = ["Arson", "Auto Theft", "Burglary", "Larceny"]

# As per FBI ucr reporting standards

In [None]:
# Narrow down to just months and type of crime I'm interested in

before_violent = df[ (df["reportMonth"]==5) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(violent_crimes)) ]
after_violent = df[ (df["reportMonth"]==6) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(violent_crimes)) ]

before_violent = before_violent.sort_values(by=["neighborhood", "ucrDescription"])
after_violent = after_violent.sort_values(by=["neighborhood", "ucrDescription"])

before_property = df[ (df["reportMonth"]==5) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(property_crimes)) ]
after_property = df[ (df["reportMonth"]==6) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(property_crimes)) ]

before_property = before_property.sort_values(by=["neighborhood", "ucrDescription"])
after_property = after_property.sort_values(by=["neighborhood", "ucrDescription"])

In [None]:
before_property = before_property.drop(["reportMonth", "reportYear"], axis=1)

In [None]:
after_property = after_property.drop(["reportMonth", "reportYear"], axis=1)

In [None]:
before_violent = before_violent.drop(["reportMonth", "reportYear"], axis=1)

In [None]:
after_violent = after_violent.drop(["reportMonth", "reportYear"], axis=1)

In [None]:
CALHOUN_ISLES = ["bryn-mawr", "cedar-isles-dean", "east bde maka ska", "east isles", "kenwood", "lowry hill", \
                 "lowry hill east", "south uptown", "west maka ska"]

CAMDEN = ["camden industrial", "cleveland", "folwell", "humboldt industrial area", "lind-bohanon", "mckinley", "shingle creek", "victory", "webber-camden"]

CENTRAL = ["downtown east", "downtown west", "elliot park", "loring park", "north loop", "stevens square-loring heights"]

LONGFELLOW = ["cooper", "hiawatha", "howe", "longfellow", "seward"]

NEAR_NORTH = ["harrison", "hawthorne", "jordan", "near-north", "sumner-glenwood", "willard-hay"]

NOKOMIS = ["diamond lake", "ericsson", "field", "hale", "keewaydin", "minnehaha", "morris park", \
           "northrop", "page", "regina", "wenonah"]

NORTHEAST = ["audubon park", "beltrami", "bottineau", "columbia park", "holland", "logan park", "marshall terrace", \
             "northeast park", "sheridan", "st. anthony east", "st. anthony west", "waite park", "windom park"]

PHILLIPS = ["east phillips", "midtown phillips", "phillips west", "ventura village"]

POWDERHORN = ["bancroft", "bryant", "central", "corcoran", "lyndale", "powderhorn park", "standish", "whittier"]

SOUTHWEST = ["armatage", "east harriet", "fulton", "kenny", "king field", "linden hills", "lynnhurst", "tangletown", "windom"]

UNIVERSITY = ["cedar-riverside", "como", "marcy-holmes", "mid-city industrial", "nicollet island-east bank", "prospect park-east river road", "university of minnesota"]

COMMUNITIES = [CALHOUN_ISLES, CAMDEN, CENTRAL, LONGFELLOW, NEAR_NORTH, NOKOMIS, NORTHEAST, \
              PHILLIPS, POWDERHORN, SOUTHWEST, UNIVERSITY]

In [None]:
before_grp = before.groupby(["neighborhood", "ucrDescription"])
after_grp = after.groupby(["neighborhood", "ucrDescription"])

In [None]:
before_agg = before_grp["number"].sum()
after_agg = after_grp["number"].sum()

In [None]:
before_agg[:24].unstack().plot.bar()

In [None]:
after_agg[:24].unstack().plot.bar()

In [None]:
before_property.sort_values(by=["neighborhood", "ucrDescription"])
after_property.sort_values(by=["neighborhood", "ucrDescription"])

before_violent.sort_values(by=["neighborhood", "ucrDescription"])
after_violent.sort_values(by=["neighborhood", "ucrDescription"])

In [None]:
before_property_grp = before.groupby(["neighborhood", "ucrDescription"])
after_property_grp = after.groupby(["neighborhood", "ucrDescription"])

before_violent_grp = before.groupby(["neighborhood", "ucrDescription"])
after_violent_grp = after.groupby(["neighborhood", "ucrDescription"])

In [None]:
before_property_agg = before_grp["number"].sum()
after_property_agg = after_grp["number"].sum()

before_violent_agg = before_grp["number"].sum()
after_violent_agg = after_grp["number"].sum()

In [None]:
after_violent_agg.head(10)

In [None]:
before_violent_agg.head(10)

In [None]:
after_violent_agg[:24].unstack().plot.bar()

In [None]:
before_violent_agg[:24].unstack().plot.bar()

In [None]:
MPLS_CITIES = CALHOUN_ISLES + CAMDEN + CENTRAL + LONGFELLOW + NEAR_NORTH + NOKOMIS + NORTHEAST + \
              PHILLIPS + POWDERHORN + SOUTHWEST + UNIVERSITY
df_cities = df.neighborhood.unique()

In [None]:
for c in sorted(MPLS_CITIES):
    if c in df_cities:
        pass
    else:
        print(c)

In [None]:
sorted(MPLS_CITIES) == sorted(df_cities)

In [None]:
len(MPLS_CITIES)

In [None]:
len(df_cities)

In [None]:
print(sorted(MPLS_CITIES))

In [None]:
print(sorted(df_cities))

In [None]:
oddballs = [x for x in (MPLS_CITIES + df_cities.tolist()) if x not in MPLS_CITIES or x not in df_cities]
print(oddballs)

In [None]:
master_city_list = sorted(MPLS_CITIES)

In [None]:
master_city_list == sorted(df_cities)

In [None]:
master_city_dict = {}

In [None]:
master_city_dict["CALHOUN_ISLES"] = CALHOUN_ISLES
master_city_dict["CAMDEN"] = CAMDEN
master_city_dict["CENTRAL"] = CENTRAL
master_city_dict["LONGFELLOW"] = LONGFELLOW
master_city_dict["NEAR_NORTH"] = NEAR_NORTH
master_city_dict["NOKOMIS"] = NOKOMIS
master_city_dict["NORTHEAST"] = NORTHEAST
master_city_dict["PHILLIPS"] = PHILLIPS
master_city_dict["POWDERHORN"] = POWDERHORN
master_city_dict["SOUTHWEST"] = SOUTHWEST
master_city_dict["UNIVERSITY"] = UNIVERSITY

In [None]:
print(master_city_dict)

In [None]:
before_property_agg

In [None]:
after_property_agg

In [None]:
before_violent_agg 

In [None]:
after_violent_agg 