# Setup

In [13]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns

from causallift import CausalLift
seed = 5
%matplotlib inline

plt.style.use('fivethirtyeight')
sns.set_palette('Paired')
plt.rcParams['figure.figsize'] = (12,8)

In [14]:
df = pd.read_csv('../data/BPD_CRIME_DATA_WITHDEMOGRAPHICS.csv')
df.columns

`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


Index(['Unnamed: 0', 'Unnamed: 0.1', 'Description', 'Inside/Outside', 'Weapon',
       'Post', 'District', 'Neighborhood', 'Longitude', 'Latitude', 'Premise',
       'Season', 'Month', 'Hour', 'Year', 'CSA', 'median_household_income',
       'households_below_poverty', 'perc18_24', 'perc25_64', 'perc65up',
       'perc_asian', 'perc_aa', 'perc_hisp', 'pwhi', 'num_households',
       'median_price_homes_sold'],
      dtype='object')

# Cleaning

In [15]:
df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], inplace=True)

In [16]:
df.head(10)



Unnamed: 0,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Premise,Season,...,households_below_poverty,perc18_24,perc25_64,perc65up,perc_asian,perc_aa,perc_hisp,pwhi,num_households,median_price_homes_sold
0,ROBBERY - RESIDENCE,Inside,KNIFE,913.0,SOUTHERN,Brooklyn,-76.60541,39.22951,ROW/TOWNHO,summer,...,24.213606,8.248062,57.449612,7.302326,2.527132,37.96124,14.906977,39.682171,,52000.0
1,AUTO THEFT,Outside,NONE,133.0,CENTRAL,Reservoir Hill,-76.63217,39.3136,STREET,summer,...,25.964546,8.231621,56.940108,11.003879,0.624468,83.990917,2.876336,10.966033,,127500.0
2,SHOOTING,Outside,FIREARM,524.0,NORTHERN,Winston-Govans,-76.60697,39.34768,STREET,summer,...,,,,,,,,,,
3,AGG. ASSAULT,Inside,OTHER,934.0,SOUTHERN,Carrollton Ridge,-76.64526,39.28315,ROW/TOWNHO,summer,...,,,,,,,,,,
4,COMMON ASSAULT,Outside,HANDS,113.0,CENTRAL,Downtown West,-76.61365,39.28756,STREET,summer,...,,,,,,,,,,
5,BURGLARY,Inside,NONE,922.0,SOUTHERN,Cherry Hill,-76.62131,39.24867,ROW/TOWNHO,summer,...,39.337017,9.134854,45.575112,9.779375,0.929598,87.469013,5.813089,4.362915,,45000.0
6,HOMICIDE,Outside,FIREARM,232.0,SOUTHEASTERN,Canton,-76.56827,39.28202,STREET,summer,...,3.661784,5.849147,75.246217,10.533269,4.383858,3.939467,3.086716,85.575306,,306500.0
7,ROBBERY - STREET,Outside,NONE,123.0,CENTRAL,Upton,-76.62789,39.30254,STREET,summer,...,42.982456,11.429971,47.277179,10.803134,1.860921,91.635651,0.636631,4.221352,,100000.0
8,AGG. ASSAULT,Outside,OTHER,641.0,NORTHWESTERN,Windsor Hills,-76.68365,39.3137,STREET,summer,...,,,,,,,,,,
9,COMMON ASSAULT,Inside,HANDS,332.0,EASTERN,Berea,-76.57419,39.30551,ROW/TOWNHO,summer,...,27.649528,8.788912,52.139479,15.157848,0.417996,93.664063,2.03498,2.364976,,42786.0


In [17]:
df['households_below_poverty'].dropna(inplace=True)

`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.


In [18]:
# Create binary treatment variables
df['Inside/Outside'].value_counts()
df['Inside'] = np.where(df['Inside/Outside'] == 'INSIDE', False, True)



In [19]:
df['households_below_poverty'].value_counts()

7.086614     1833
5.454545     1594
26.333747    1369
16.856025    1225
31.822899    1204
             ... 
21.188630      49
17.408907      43
26.858276      42
26.280624      32
21.687667      29
Name: households_below_poverty, Length: 123, dtype: int64

In [20]:
# Encoding
df['Description'] = df['Description'].astype('category').cat.codes
df['Description']

0         12
1          3
2         14
3          0
4          5
          ..
263116     7
263117     7
263118     7
263119     4
263120     4
Name: Description, Length: 263121, dtype: int8

# Is Below Average Poverty Treatment

In [21]:
df2 = df.copy()
df2['households_below_poverty'] = df2['households_below_poverty'].dropna()
df2.reset_index(inplace=True)
households_below_avg_poverty = df2['households_below_poverty'].mean()
df2['Treatment'] = np.where(df['households_below_poverty'] <= households_below_avg_poverty, 1, 0)

In [25]:
df2.rename({'Description': 'Outcome'})
# Main causal parameters
conversion = 'Description'
treatment_group_key = 'control'
# treatment_name = 'Functioning day bool'
features = ['Weapon', 'Neighborhood', 'Season']
df2[['Weapon', 'Neighborhood', 'Season']].values
df2['control']

train_df, test_df = train_test_split(df2, test_size=0.2, random_state=seed, stratify=df['Treatment'])

print('\n[Estimate propensity scores for Inverse Probability Weighting.]')
cl = CausalLift(train_df, test_df, enable_ipw=True, verbose=3)

`should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.



[Estimate propensity scores for Inverse Probability Weighting.]
[2021-04-06 16:13:40,495|causallift.context.flexible_context|INFO] Run pipeline (tags: ['011_bundle_train_and_test_data'], SequentialRunner, only_missing: False)
[2021-04-06 16:13:40,526|kedro.pipeline.node|INFO] Running node: bundle_train_and_test_data([args_raw,test_df,train_df]) -> [df_00]
[2021-04-06 16:13:40,648|kedro.runner.sequential_runner|INFO] Completed 1 out of 1 tasks
[2021-04-06 16:13:40,649|kedro.runner.sequential_runner|INFO] Pipeline execution completed successfully.
[2021-04-06 16:13:40,684|causallift.context.flexible_context|INFO] Run pipeline (tags: ['121_prepare_args', '131_treatment_fractions_', '141_initialize_model'], SequentialRunner, only_missing: False)
[2021-04-06 16:13:40,702|kedro.pipeline.node|INFO] Running node: treatment_fractions_([args_raw,df_00]) -> [treatment_fractions]
[2021-04-06 16:13:40,778|kedro.runner.sequential_runner|INFO] Completed 1 out of 3 tasks
[2021-04-06 16:13:40,791|kedr

ValueError: could not convert string to float: 'Inside'