In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dat = []

In [None]:
with open('MCCA_data_w_prog.txt') as fp:
    lines = [line.replace(',','').strip().split('Police') for line in fp]

In [None]:
names = [line[0].rstrip(' ') for line in lines]
numbers = [line[1].lstrip(' ') for line in lines]

In [None]:
numbers = [line.split(' ') for line in numbers]

In [None]:
rawdf = pd.DataFrame(numbers).astype(int)

In [None]:
df_2020 = rawdf[[0, 2, 4, 6]]
df_2019 = rawdf[[1, 3, 5, 7]]
df_2020.columns = df_2019.columns = ['homicide', 'rape', 'robbery', 'assault']

In [None]:
df_2019['prog_narrow'] = df_2020['prog_narrow'] = rawdf[8]
df_2019['prog_broad'] = df_2020['prog_broad'] = rawdf[9]

In [None]:
df_2020.index = names
df_2020.index.name = 'city'
df_2019.index = names
df_2019.index.name = 'city'

In [None]:
df_2020['violent_crime'] = df_2020[['homicide', 'rape', 'robbery', 'assault']].sum(axis='columns')
df_2019['violent_crime'] = df_2019[['homicide', 'rape', 'robbery', 'assault']].sum(axis='columns')

In [None]:
(df_2020['violent_crime'] / df_2019['violent_crime']).mean()

In [None]:
df_2020_prog_narrow = df_2020[df_2020['prog_narrow']==1]
df_2020_nonprog_narrow = df_2020[df_2020['prog_narrow']==0]
df_2019_prog_narrow = df_2019[df_2019['prog_narrow']==1]
df_2019_nonprog_narrow = df_2019[df_2019['prog_narrow']==0]

In [None]:
df_2020_prog_narrow['violent_crime'].sum() / df_2019_prog_narrow['violent_crime'].sum()

In [None]:
fig, ax = plt.subplots(figsize=(16,16))
ax.scatter(df_2019_prog_narrow['homicide'], df_2020_prog_narrow['homicide'], color='blue', label = 'progressive DA')
ax.scatter(df_2019_nonprog_narrow['homicide'], df_2020_nonprog_narrow['homicide'], color='red', label= 'non-progressive DA')
ax.plot([0,700],[0,700])
ax.set_xlabel('# homicides 2019')
ax.set_ylabel('# homicides 2020')
for i, txt in enumerate(df_2020.reset_index()['city']):
    plt.annotate(txt[0:4], ((df_2019.loc[df_2019.index[i],'homicide']), (df_2020.loc[df_2020.index[i], 'homicide'])))

ax.legend()
fig.suptitle('Homocides in 2020 vs Homicides in 2019, progressive v non-progressive DAs', fontsize=20)
fig.tight_layout()

In [None]:
prog_ratios = df_2020_prog_narrow['violent_crime'] / df_2019_prog_narrow['violent_crime']
nonprog_ratios = df_2020_nonprog_narrow['violent_crime'] / df_2019_nonprog_narrow['violent_crime']

In [None]:
prog_hom_ratios = df_2020_prog_narrow['homicide'] / df_2019_prog_narrow['homicide']
nonprog_hom_ratios = df_2020_nonprog_narrow['homicide'] / df_2019_nonprog_narrow['homicide']

In [None]:
fig,ax = plt.subplots()
fig.suptitle('Homicide YoY Increase')
ax.hist(prog_hom_ratios, color='blue', alpha=0.5, label='progressive DA')
ax.hist(nonprog_hom_ratios, color='red', alpha=0.5, label = 'non-progressive DA')
ax.legend()

# It makes more sense to compare sums than means

In [None]:
df_2020['homicide'].sum() / df_2019['homicide'].sum()

In [None]:
df_2020['violent_crime'].sum() / df_2019['violent_crime'].sum()

### Increase in homicides in narrow-progressive DA jurisdictions is 36%, violent crime is up 4.5%

In [None]:
df_2020_prog_narrow['homicide'].sum() / df_2019_prog_narrow['homicide'].sum()

In [None]:
df_2020_prog_narrow['violent_crime'].sum() / df_2019_prog_narrow['violent_crime'].sum()

### Homicides in broad-progressive DA juristictions is up 34%, violent crime is up 4.5%

In [None]:
df_2020.loc[df_2020['prog_broad']==1, 'homicide'].sum() / df_2019.loc[df_2019['prog_broad']==1, 'homicide'].sum()

In [None]:
df_2020.loc[df_2020['prog_broad']==1, 'violent_crime'].sum() / df_2019.loc[df_2019['prog_broad']==1, 'violent_crime'].sum()

### Increase in homicides in non-narrow-progressive DA jurisdictions is 30%, violent crime is up 4.8%

In [None]:
df_2020_nonprog_narrow['homicide'].sum() / df_2019_nonprog_narrow['homicide'].sum()

In [None]:
df_2020_nonprog_narrow['violent_crime'].sum() / df_2019_nonprog_narrow['violent_crime'].sum()

### Homicides in non-broad-nonprogressive DA juristictions is up 32%, violent crime is up 4.9%

In [None]:
df_2020.loc[df_2020['prog_broad']==0, 'homicide'].sum() / df_2019.loc[df_2019['prog_broad']==0, 'homicide'].sum()

In [None]:
df_2020.loc[df_2020['prog_broad']==0, 'violent_crime'].sum() / df_2019.loc[df_2019['prog_broad']==0, 'violent_crime'].sum()

# Differences:

In [None]:
df_2020['homicide'].sum() - df_2019['homicide'].sum()

In [None]:
# df_2019.to_csv('data_2019.csv')
# df_2020.to_csv('data_2020.csv')

In [None]:
df_2020/df_2019