In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt

In [2]:
pd.options.display.max_rows = 1000

df = pd.read_csv("mpls_crime_2017-2022.csv", encoding="utf-8")

In [None]:
df.sample(n=5)

In [3]:
def clean_df(a_df):
    
    a_df = a_df.drop("neighborhoodCrimeStatisticsID", axis=1)
    a_df = a_df.loc[df["neighborhood"] != "Z_** NOT ASSIGNED **"]
    a_df = a_df.loc[df["neighborhood"] != "** NOT ASSIGNED **"]
    a_df["neighborhood"] = a_df["neighborhood"].replace("CARAG", "South Uptown")
    a_df["neighborhood"] = a_df["neighborhood"].replace("ECCO", "East Bde Maka Ska")
    a_df["neighborhood"] = a_df["neighborhood"].replace("WEST CALHOUN", "West Maka Ska")
    a_df["neighborhood"] = a_df.neighborhood.str.lower()
    a_df["neighborhood"] = a_df["neighborhood"].replace("stevens square - loring heights", "steven's square - loring heights")
    a_df["neighborhood"] = a_df["neighborhood"].replace("west calhoun", "west maka ska")
    
    return a_df.copy()
    

In [4]:
# Breakpoint

df = clean_df(df).sort_values(by="neighborhood")

In [6]:
# 480 data points for each neighborhood, accounting for name changes!

# df.neighborhood.value_counts()



In [7]:
# Mpls Neighborhood "Communities"

CALHOUN_ISLES = ["bryn - mawr", "cedar - isles - dean", "east bde maka ska", "east isles", "kenwood", "lowry hill", \
                 "lowry hill east", "south uptown", "west maka ska"]

CAMDEN = ["camden industrial", "cleveland", "folwell", "humboldt industrial area", "lind - bohanon", "mckinley", \
          "shingle creek", "victory", "webber - camden"]

CENTRAL = ["downtown east", "downtown west", "elliot park", "loring park", "north loop", "steven's square - loring heights"]

LONGFELLOW = ["cooper", "hiawatha", "howe", "longfellow", "seward"]

NEAR_NORTH = ["harrison", "hawthorne", "jordan", "near - north", "sumner - glenwood", "willard - hay"]

NOKOMIS = ["diamond lake", "ericsson", "field", "hale", "keewaydin", "minnehaha", "morris park", \
           "northrop", "page", "regina", "wenonah"]

NORTHEAST = ["audubon park", "beltrami", "bottineau", "columbia park", "holland", "logan park", "marshall terrace", \
             "northeast park", "sheridan", "st. anthony east", "st. anthony west", "waite park", "windom park"]

PHILLIPS = ["east phillips", "midtown phillips", "phillips west", "ventura village"]

POWDERHORN = ["bancroft", "bryant", "central", "corcoran", "lyndale", "powderhorn park", "standish", "whittier"]
 
SOUTHWEST = ["armatage", "east harriet", "fulton", "kenny", "king field", "linden hills", "lynnhurst", "tangletown", \
             "windom"]

UNIVERSITY = ["cedar riverside", "como", "marcy holmes", "mid - city industrial", "nicollet island - east bank", \
              "prospect park - east river road", "university of minnesota"]

In [8]:
# Make list of communities names, each a list of neighborhoods

COMMUNITIES = ["CALHOUN_ISLES", "CAMDEN", "CENTRAL", "LONGFELLOW", "NEAR_NORTH", "NOKOMIS", "NORTHEAST", \
              "PHILLIPS", "POWDERHORN", "SOUTHWEST", "UNIVERSITY"]

In [9]:
# As per FBI ucr reporting standards

violent_crimes = ["Aggravated Assault", "Homicide", "Rape", "Robbery"]

property_crimes = ["Arson", "Auto Theft", "Burglary", "Larceny"]

In [11]:
# Narrow down to just months and type of crime I'm interested in

before_violent = df[ (df["reportMonth"]==5) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(violent_crimes)) ]
before_violent = before_violent.sort_values(by=["neighborhood", "ucrDescription"])

before_violent = before_violent.drop(["reportMonth", "reportYear"], axis=1)

before_violent

Unnamed: 0,neighborhood,ucrDescription,number
23232,armatage,Aggravated Assault,1.0
23236,armatage,Homicide,0.0
23238,armatage,Rape,0.0
23239,armatage,Robbery,0.0
23240,audubon park,Aggravated Assault,0.0
23244,audubon park,Homicide,0.0
23246,audubon park,Rape,0.0
23247,audubon park,Robbery,0.0
23248,bancroft,Aggravated Assault,2.0
23252,bancroft,Homicide,0.0


In [12]:
after_violent = df[ (df["reportMonth"]==6) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(violent_crimes)) ]
after_violent = after_violent.sort_values(by=["neighborhood", "ucrDescription"])

after_violent = after_violent.drop(["reportMonth", "reportYear"], axis=1)

after_violent

Unnamed: 0,neighborhood,ucrDescription,number
23936,armatage,Aggravated Assault,1.0
23940,armatage,Homicide,0.0
23942,armatage,Rape,0.0
23943,armatage,Robbery,0.0
23944,audubon park,Aggravated Assault,1.0
23948,audubon park,Homicide,0.0
23950,audubon park,Rape,0.0
23951,audubon park,Robbery,2.0
23952,bancroft,Aggravated Assault,3.0
23956,bancroft,Homicide,0.0


In [13]:
before_property = df[ (df["reportMonth"]==5) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(property_crimes)) ]
before_property = before_property.sort_values(by=["neighborhood", "ucrDescription"])

before_property = before_property.drop(["reportMonth", "reportYear"], axis=1)

before_property

Unnamed: 0,neighborhood,ucrDescription,number
23233,armatage,Arson,0.0
23234,armatage,Auto Theft,0.0
23235,armatage,Burglary,6.0
23237,armatage,Larceny,2.0
23241,audubon park,Arson,0.0
23242,audubon park,Auto Theft,0.0
23243,audubon park,Burglary,0.0
23245,audubon park,Larceny,6.0
23249,bancroft,Arson,0.0
23250,bancroft,Auto Theft,6.0


In [14]:
after_property = df[ (df["reportMonth"]==6) & (df["reportYear"]==2020) & (df["ucrDescription"].isin(property_crimes)) ]
after_property = after_property.sort_values(by=["neighborhood", "ucrDescription"])

after_property = after_property.drop(["reportMonth", "reportYear"], axis=1)

after_property

Unnamed: 0,neighborhood,ucrDescription,number
23937,armatage,Arson,0.0
23938,armatage,Auto Theft,2.0
23939,armatage,Burglary,1.0
23941,armatage,Larceny,1.0
23945,audubon park,Arson,0.0
23946,audubon park,Auto Theft,2.0
23947,audubon park,Burglary,2.0
23949,audubon park,Larceny,4.0
23953,bancroft,Arson,0.0
23954,bancroft,Auto Theft,3.0


## Now have the dfs I will use for all further work:
        
    before_violent
    after_violent
    
    before_property
    after_property

## For sanity, group neighborhoods into recognized "Communities"

CALHOUN_ISLES, CAMDEN, CENTRAL, LONGFELLOW, NEAR_NORTH, NOKOMIS, NORTHEAST, PHILLIPS, POWDERHORN, SOUTHWEST, UNIVERSITY

## Naming scheme:
    
    CALH = CALHOUN_ISLES
        
         bp_CALH_df 
         bp_CALH_items (series)
         bp_CALH_agg (scalar)
        
         ap_CALH_df 
         ap_CALH_items (series)
         ap_CALH_agg (scalar)
        
         =========================
         
         bv_CALH_df 
         bv_CALH_items (series)
         bv_CALH_agg (scalar)
        
         av_CALH_df 
         av_CALH_items (series)
         av_CALH_agg (scalar)
        
    CAMD = CAMDEN
    CENT = CENTRAL
    LONG = LONGFELLOW
    NEAR = NEAR_NORTH
    NOKO = NOKOMIS
    NORT = NORTHEAST
    PHIL = PHILLIPS
    POWD = POWDERHORN
    SOUT = SOUTHWEST
    UNIV = UNIVERSITY

In [None]:
def run_agg(a_df):
    """ Return Series """
    return a_df.groupby(["neighborhood", "ucrDescription"])["number"].sum()

In [None]:
run_agg(before_property).info()

# .unstack().sum().sum()

In [36]:
bp_CALH_df = before_property.loc[ (before_property["neighborhood"].isin(CALHOUN_ISLES)), : ]
bp_CALH_df.head()

Unnamed: 0,neighborhood,ucrDescription,number
23281,bryn - mawr,Arson,0.0
23282,bryn - mawr,Auto Theft,1.0
23283,bryn - mawr,Burglary,3.0
23285,bryn - mawr,Larceny,1.0
23305,cedar - isles - dean,Arson,0.0


In [37]:
bp_CALH_items = bp_CALH_df.groupby("neighborhood").sum()
bp_CALH_items

Unnamed: 0_level_0,number
neighborhood,Unnamed: 1_level_1
bryn - mawr,5.0
cedar - isles - dean,20.0
east bde maka ska,9.0
east isles,19.0
kenwood,5.0
lowry hill,11.0
lowry hill east,61.0
south uptown,22.0
west maka ska,19.0


In [39]:
bp_CALH_agg = bp_CALH_items.sum()
bp_CALH_agg

number    171.0
dtype: float64

In [38]:
bp_CALH_items = bp_CALH_df.groupby(["neighborhood", "ucrDescription"]).sum()
bp_CALH_items

Unnamed: 0_level_0,Unnamed: 1_level_0,number
neighborhood,ucrDescription,Unnamed: 2_level_1
bryn - mawr,Arson,0.0
bryn - mawr,Auto Theft,1.0
bryn - mawr,Burglary,3.0
bryn - mawr,Larceny,1.0
cedar - isles - dean,Arson,0.0
cedar - isles - dean,Auto Theft,3.0
cedar - isles - dean,Burglary,5.0
cedar - isles - dean,Larceny,12.0
east bde maka ska,Arson,0.0
east bde maka ska,Auto Theft,1.0


In [31]:
bp_CALH_agg = bp_CALH_items.sum()
bp_CALH_agg

Unnamed: 0_level_0,Unnamed: 1_level_0,number
neighborhood,ucrDescription,Unnamed: 2_level_1
bryn - mawr,Arson,0.0
bryn - mawr,Auto Theft,1.0
bryn - mawr,Burglary,4.0
bryn - mawr,Larceny,5.0
cedar - isles - dean,Arson,5.0
cedar - isles - dean,Auto Theft,8.0
cedar - isles - dean,Burglary,13.0
cedar - isles - dean,Larceny,25.0
east bde maka ska,Arson,25.0
east bde maka ska,Auto Theft,26.0


In [None]:
# bv_CALH_items (series)

# bv_CALH_agg (scalar)

In [None]:
def get_community_totals(a_df, neighborhood):
    
    """ Returns Series """
    
    return (
                 a_df
                    .sort_values(by=@neighborhood))
                    .groupby(@neighborhood)
                    ["number"]
                    .sum()
                    
           )

In [None]:
before_property.sort_values(by="neighborhood")

In [None]:
before_property.groupby(["neighborhood", "ucrDescription"])["number"].sum()      

In [None]:
.plot(kind="bar")

In [None]:
bp_CALH_agg = bp_CALH_items.groupby("neighborhood")["number"].sum()

In [None]:
bp_CALH_items

In [None]:
bp_CALH_agg

In [None]:
run_agg(before_property)

In [None]:
before_grp = before.groupby(["neighborhood", "ucrDescription"])
after_grp = after.groupby(["neighborhood", "ucrDescription"])

In [None]:
before_agg = before_grp["number"].sum()
after_agg = after_grp["number"].sum()

In [None]:
before_agg[:24].unstack().plot.bar()

In [None]:
after_agg[:24].unstack().plot.bar()

In [None]:
before_property.sort_values(by=["neighborhood", "ucrDescription"])
after_property.sort_values(by=["neighborhood", "ucrDescription"])

before_violent.sort_values(by=["neighborhood", "ucrDescription"])
after_violent.sort_values(by=["neighborhood", "ucrDescription"])

In [None]:
before_property_grp = before.groupby(["neighborhood", "ucrDescription"])
after_property_grp = after.groupby(["neighborhood", "ucrDescription"])

before_violent_grp = before.groupby(["neighborhood", "ucrDescription"])
after_violent_grp = after.groupby(["neighborhood", "ucrDescription"])

In [None]:
before_property_agg = before_grp["number"].sum()
after_property_agg = after_grp["number"].sum()

before_violent_agg = before_grp["number"].sum()
after_violent_agg = after_grp["number"].sum()

In [None]:
after_violent_agg.head(10)

In [None]:
before_violent_agg.head(10)

In [None]:
after_violent_agg[:24].unstack().plot.bar()

In [None]:
before_violent_agg[:24].unstack().plot.bar()