In [4]:
import pandas as pd
import numpy as np

In [6]:
# uploading Cure dataset

cure_data = pd.read_csv('../cure_analysis/data/output/cure_analysis_data_2020_final.csv').drop(columns=['Unnamed: 0']) # add your own path

# filtering out dates before 2011

cure_data = cure_data[cure_data['year.x'] >= 2011].rename(columns={'year.x':'year'})

In [7]:
# adding a 'change_from_start_year' column to track % change in shootings from the year before a precinct entered Cure

cure_data['change_from_start_year'] = '' # empty column for % change
shootings_change_df = pd.DataFrame() # empty df to place final % change results in
start_year_list = [] # empty list to place each precinct's start year in

for precinct in list(cure_data['precinct'].unique()): # for each precinct in Cure
    
    precinct_df = cure_data[cure_data['precinct'] == precinct] # df only including data for the given precinct
    
    for ind in precinct_df.index: # iterating over every year in this precinct-specific df
    
        start_year = precinct_df[precinct_df['time_after_int'] == 1]['year'].to_list()[0] # the year a precinct entered Cure
        dif = precinct_df['time_after_int'][ind] # how many years it's been since a precinct entered Cure
        start_count = precinct_df[precinct_df['year'] == start_year - 1]['shootings_per_person'].to_list()[0] # shootings per person year before Cure started
        end_count = precinct_df[precinct_df['year'] == (start_year - 1) + dif]['shootings_per_person'].to_list()[0] # shootings per person in current year
        
        if dif >= 1: # if start year or later, add % change to change_from_start_year column
        
            precinct_df['change_from_start_year'][ind] = round(((end_count - start_count) / start_count) * 100, 2)
        
        else: # otherwise, leave blank
            
            precinct_df['change_from_start_year'][ind] = ''
            
    shootings_change_df = shootings_change_df.append(precinct_df, ignore_index=True) # combine all precinct-specific DFs into one
    start_year_list.append(start_year) # add each precinct's start year to this list (will be used later)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  precinct_df['change_from_start_year'][ind] = ''
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  precinct_df['change_from_start_year'][ind] = round(((end_count - start_count) / start_count) * 100, 2)
  shootings_change_df = shootings_change_df.append(precinct_df, ignore_index=True) # combine all precinct-specific DFs into one


In [8]:
shootings_change_df

Unnamed: 0,precinct,year,time,cure,time_after_int,p0010001,shootings_count,shootings_per_person,arrests_count,arrests_per_pop,change_from_start_year
0,23,2011,6,0,0,73106,41,0.000561,6241,0.085369,
1,23,2012,7,0,0,73106,32,0.000438,5696,0.077914,
2,23,2013,8,0,0,73106,12,0.000164,5700,0.077969,
3,23,2014,9,0,0,73106,16,0.000219,5502,0.075261,
4,23,2015,10,0,0,73106,23,0.000315,5141,0.070323,
...,...,...,...,...,...,...,...,...,...,...,...
205,120,2016,11,1,3,113008,25,0.000221,4851,0.042926,-40.48
206,120,2017,12,1,4,113008,27,0.000239,4742,0.041962,-35.71
207,120,2018,13,1,5,113008,20,0.000177,4587,0.040590,-52.38
208,120,2019,14,1,6,113008,12,0.000106,3704,0.032776,-71.43


In [4]:
# pivoting the columns

shootings_change_df = shootings_change_df.pivot(index='precinct', columns='year', values='change_from_start_year')

# sorting by the year a precinct entered Cure

shootings_change_df = shootings_change_df.reset_index()
shootings_change_df['start_year'] = start_year_list # using start_year_list to tag each precinct with its Cure start year
shootings_change_df = shootings_change_df.sort_values(['start_year','precinct']) # sorting by start year and precinct
shootings_change_df.index = shootings_change_df['precinct']

In [5]:
# calculating average percent change in the first year Cure was implemented

percent_change_list = [] # empty list to place each precinct's % change

for ind in shootings_change_df.index:
    
    start_year = shootings_change_df['start_year'][ind] # year a precinct started Cure
    percent_change = shootings_change_df[start_year][ind] # the % change in the first year of Cure
    
    percent_change_list.append(percent_change) # add % change to ongoing list

round(np.mean(percent_change_list), 2) # mean of all % changes in first year

-11.64

In [6]:
# final df

shootings_change_df = shootings_change_df.drop(columns=['start_year','precinct'])
shootings_change_df

year,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
75,,-32.06,-40.46,-38.93,-38.17,-48.09,-66.41,-74.81,-49.62,-51.15
77,,0.0,-23.53,-23.53,-33.33,-37.25,-70.59,-60.78,-15.69,-50.98
40,,,-35.59,-28.81,-45.76,-42.37,-45.76,-33.9,-45.76,-57.63
113,,,-38.81,-19.4,-61.19,-34.33,-59.7,-77.61,-44.78,-52.24
120,,,,-40.48,-28.57,-40.48,-35.71,-52.38,-71.43,-54.76
42,,,,,-27.27,-50.91,-43.64,-32.73,-32.73,-41.82
46,,,,,0.0,-35.29,-54.41,-61.76,-45.59,-72.06
67,,,,,-21.52,-10.13,-30.38,-48.1,-58.23,-59.49
73,,,,,-19.77,-41.86,-56.98,-36.05,-44.19,-55.81
79,,,,,2.56,5.13,-12.82,0.0,-17.95,-17.95


In [7]:
# downloading the data

# shootings_change_df.to_csv('shootings-change-from-start-year_by-precinct.csv')