# Imports

In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Input Data

In [2]:
df = pd.read_csv('../pro-test/data/Leb_1_drop_non_impact_params.csv',index_col=0,header=0)

## Remove spaces from column headings

In [3]:
df.columns = df.columns.str.replace(' ', '')

# Feature Selection

## X Parameters

### Combining and imputing protest size

#### Combine size columns

In [4]:
original_size_parameters = df[['sizeexact','sizeestimate']]
original_size_parameters['sizeexact'] = original_size_parameters['sizeexact'].fillna(0)
original_size_parameters['sizeestimate'] = original_size_parameters['sizeestimate'].fillna(0)
combined_sizes = pd.DataFrame(data=original_size_parameters['sizeestimate'] + original_size_parameters['sizeexact'],columns=['combined_sizes'])
record_number = combined_sizes.index
combined_sizes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_size_parameters['sizeexact'] = original_size_parameters['sizeexact'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  original_size_parameters['sizeestimate'] = original_size_parameters['sizeestimate'].fillna(0)


Unnamed: 0_level_0,combined_sizes
recordnumber,Unnamed: 1_level_1
20191592,31.0
20200162,-99.0
20191864,3.0
20200903,-99.0
20200891,-99.0
...,...
20200311,31.0
20192258,3.0
20191013,31.0
20200552,301.0


#### Impute -99 values to averages

In [5]:
mean_size_imputer = SimpleImputer(missing_values=-99,strategy="mean")
combined_sizes = pd.DataFrame(mean_size_imputer.fit_transform(combined_sizes),index=record_number,columns=['combined_sizes'])
combined_sizes

Unnamed: 0_level_0,combined_sizes
recordnumber,Unnamed: 1_level_1
20191592,31.000000
20200162,145.916121
20191864,3.000000
20200903,145.916121
20200891,145.916121
...,...
20200311,31.000000
20192258,3.000000
20191013,31.000000
20200552,301.000000


### Final selection of X parameters

In [6]:
selected_X_parameters = ['Amal','Hezbollah','ProgressiveSocialistMovement']
selected_X_parameters

['Amal', 'Hezbollah', 'ProgressiveSocialistMovement']

## X Data

In [7]:
X = df[selected_X_parameters]  
X = pd.concat([X,combined_sizes],axis=1)
X

Unnamed: 0_level_0,Amal,Hezbollah,ProgressiveSocialistMovement,combined_sizes
recordnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
20191592,0,0,0,31.000000
20200162,0,0,0,145.916121
20191864,0,0,0,3.000000
20200903,0,0,0,145.916121
20200891,0,0,0,145.916121
...,...,...,...,...
20200311,0,0,0,31.000000
20192258,0,0,0,3.000000
20191013,0,0,0,31.000000
20200552,0,0,0,301.000000


# y Encoding

In [8]:
y = pd.get_dummies(df['repression'])
y

Unnamed: 0_level_0,Army present at event,Arrests / detentions,Deaths inflicted,Injuries inflicted,"No known coercion, no security presence",Party Militias/ Baltagia present at event,Physical harassment,Security forces present at event
recordnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20191592,0,0,0,0,1,0,0,0
20200162,0,0,0,0,1,0,0,0
20191864,0,0,0,0,1,0,0,0
20200903,0,0,0,0,1,0,0,0
20200891,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...
20200311,0,0,0,0,1,0,0,0
20192258,1,0,0,0,0,0,0,0
20191013,0,0,0,0,1,0,0,0
20200552,0,0,0,0,1,0,0,0


## Formatting column titles

In [9]:
y.columns = y.columns.str.replace(' ', '_')
y.columns = y.columns.str.replace('/', '')
y.columns = y.columns.str.replace(',', '')
y

Unnamed: 0_level_0,Army_present_at_event,Arrests__detentions,Deaths_inflicted,Injuries_inflicted,No_known_coercion_no_security_presence,Party_Militias_Baltagia_present_at_event,Physical_harassment,Security_forces_present_at_event
recordnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20191592,0,0,0,0,1,0,0,0
20200162,0,0,0,0,1,0,0,0
20191864,0,0,0,0,1,0,0,0
20200903,0,0,0,0,1,0,0,0
20200891,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...
20200311,0,0,0,0,1,0,0,0
20192258,1,0,0,0,0,0,0,0
20191013,0,0,0,0,1,0,0,0
20200552,0,0,0,0,1,0,0,0


# Combined X and y

## make set more violent

In [10]:
X = X[selected_X_parameters].replace({0:1})

#X[selected_X_parameters].iloc[::2] = 1
#data["Deaths_inflicted"].replace({0:1}, limit =50)
data = pd.concat([X,y],axis=1)

data

Unnamed: 0_level_0,Amal,Hezbollah,ProgressiveSocialistMovement,Army_present_at_event,Arrests__detentions,Deaths_inflicted,Injuries_inflicted,No_known_coercion_no_security_presence,Party_Militias_Baltagia_present_at_event,Physical_harassment,Security_forces_present_at_event
recordnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
20191592,1,1,1,0,0,0,0,1,0,0,0
20200162,1,1,1,0,0,0,0,1,0,0,0
20191864,1,1,1,0,0,0,0,1,0,0,0
20200903,1,1,1,0,0,0,0,1,0,0,0
20200891,1,1,1,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
20200311,1,1,1,0,0,0,0,1,0,0,0
20192258,1,1,1,1,0,0,0,0,0,0,0
20191013,1,1,1,0,0,0,0,1,0,0,0
20200552,1,1,1,0,0,0,0,1,0,0,0


In [11]:
data["Deaths_inflicted"] = data["Deaths_inflicted"].replace({0:1})
data

Unnamed: 0_level_0,Amal,Hezbollah,ProgressiveSocialistMovement,Army_present_at_event,Arrests__detentions,Deaths_inflicted,Injuries_inflicted,No_known_coercion_no_security_presence,Party_Militias_Baltagia_present_at_event,Physical_harassment,Security_forces_present_at_event
recordnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
20191592,1,1,1,0,0,1,0,1,0,0,0
20200162,1,1,1,0,0,1,0,1,0,0,0
20191864,1,1,1,0,0,1,0,1,0,0,0
20200903,1,1,1,0,0,1,0,1,0,0,0
20200891,1,1,1,0,0,1,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
20200311,1,1,1,0,0,1,0,1,0,0,0
20192258,1,1,1,1,0,1,0,0,0,0,0
20191013,1,1,1,0,0,1,0,1,0,0,0
20200552,1,1,1,0,0,1,0,1,0,0,0


In [12]:
data.isna().sum()

Amal                                        0
Hezbollah                                   0
ProgressiveSocialistMovement                0
Army_present_at_event                       0
Arrests__detentions                         0
Deaths_inflicted                            0
Injuries_inflicted                          0
No_known_coercion_no_security_presence      0
Party_Militias_Baltagia_present_at_event    0
Physical_harassment                         0
Security_forces_present_at_event            0
dtype: int64

# Train/Test Split

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=17)

In [14]:
data_train = pd.concat([X_train,y_train],axis=1)
data_train

Unnamed: 0_level_0,Amal,Hezbollah,ProgressiveSocialistMovement,Army_present_at_event,Arrests__detentions,Deaths_inflicted,Injuries_inflicted,No_known_coercion_no_security_presence,Party_Militias_Baltagia_present_at_event,Physical_harassment,Security_forces_present_at_event
recordnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
20190252,1,1,1,0,0,0,0,0,0,1,0
20200722,1,1,1,0,0,0,0,1,0,0,0
20200872,1,1,1,0,0,0,0,1,0,0,0
20190346,1,1,1,0,0,0,0,1,0,0,0
20190108,1,1,1,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
20191765,1,1,1,0,0,0,0,1,0,0,0
20191706,1,1,1,0,0,0,0,1,0,0,0
20200309,1,1,1,0,0,0,0,1,0,0,0
20192257,1,1,1,1,0,0,0,0,0,0,0


# Decision Tree

## Instantiate and Train Decision Tree Models

In [15]:
y_columns = y.columns.values
y_columns

array(['Army_present_at_event', 'Arrests__detentions', 'Deaths_inflicted',
       'Injuries_inflicted', 'No_known_coercion_no_security_presence',
       'Party_Militias_Baltagia_present_at_event', 'Physical_harassment',
       'Security_forces_present_at_event'], dtype=object)

In [16]:
model_Army_present_at_event = DecisionTreeClassifier(random_state=2)
model_Army_present_at_event.fit(X_train,y_train['Army_present_at_event'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [17]:
model_Arrests__detentions = DecisionTreeClassifier(random_state=2)
model_Arrests__detentions.fit(X_train,y_train['Arrests__detentions'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [18]:
model_Deaths_inflicted = DecisionTreeClassifier(random_state=2)
model_Deaths_inflicted.fit(X_train,y_train['Deaths_inflicted'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [19]:
model_Injuries_inflicted = DecisionTreeClassifier(random_state=2)
model_Injuries_inflicted.fit(X_train,y_train['Injuries_inflicted'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [20]:
model_No_known_coercion_no_security_presence = DecisionTreeClassifier(random_state=2)
model_No_known_coercion_no_security_presence.fit(X_train,y_train['No_known_coercion_no_security_presence'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [21]:
model_Party_Militias_Baltagia_present_at_event = DecisionTreeClassifier(random_state=2)
model_Party_Militias_Baltagia_present_at_event.fit(X_train,y_train['Party_Militias_Baltagia_present_at_event'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [22]:
model_Physical_harassment = DecisionTreeClassifier(random_state=2)
model_Physical_harassment.fit(X_train,y_train['Physical_harassment'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

In [23]:
model_Security_forces_present_at_event = DecisionTreeClassifier(random_state=2)
model_Security_forces_present_at_event.fit(X_train,y_train['Security_forces_present_at_event'])

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=2, splitter='best')

## Evaluate Accuracy of Model

### Army_present_at_event

In [24]:
model_Army_present_at_event.score(X_test,y_test['Army_present_at_event'])

0.9332706766917294

In [25]:
Predictions = model_Army_present_at_event.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Army_present_at_event']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [26]:
y_test['Army_present_at_event'].value_counts()

0    993
1     71
Name: Army_present_at_event, dtype: int64

### Arrests__detentions

In [27]:
model_Arrests__detentions.score(X_test,y_test['Arrests__detentions'])

0.9915413533834586

In [28]:
Predictions = model_Arrests__detentions.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Arrests__detentions']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [29]:
y_test['Arrests__detentions'].value_counts()

0    1055
1       9
Name: Arrests__detentions, dtype: int64

### Deaths_inflicted

In [30]:
model_Deaths_inflicted.score(X_test,y_test['Deaths_inflicted'])

0.9990601503759399

In [31]:
Predictions = model_Deaths_inflicted.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Deaths_inflicted']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [32]:
y_test['Deaths_inflicted'].value_counts()

0    1063
1       1
Name: Deaths_inflicted, dtype: int64

### Injuries_inflicted

In [33]:
model_Injuries_inflicted.score(X_test,y_test['Injuries_inflicted'])

0.9887218045112782

In [34]:
Predictions = model_Injuries_inflicted.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Injuries_inflicted']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [35]:
y_test['Injuries_inflicted'].value_counts()

0    1052
1      12
Name: Injuries_inflicted, dtype: int64

### No_known_coercion_no_security_presence

In [36]:
model_No_known_coercion_no_security_presence.score(X_test,y_test['No_known_coercion_no_security_presence'])

0.8016917293233082

In [37]:
Predictions = model_No_known_coercion_no_security_presence.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['No_known_coercion_no_security_presence']})
Compare['Predictions'].value_counts()

1    1064
Name: Predictions, dtype: int64

In [38]:
y_test['No_known_coercion_no_security_presence'].value_counts()

1    853
0    211
Name: No_known_coercion_no_security_presence, dtype: int64

### Party_Militias_Baltagia_present_at_event

In [39]:
model_Party_Militias_Baltagia_present_at_event.score(X_test,y_test['Party_Militias_Baltagia_present_at_event'])

0.9924812030075187

In [40]:
Predictions = model_Party_Militias_Baltagia_present_at_event.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Party_Militias_Baltagia_present_at_event']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [41]:
y_test['Party_Militias_Baltagia_present_at_event'].value_counts()

0    1056
1       8
Name: Party_Militias_Baltagia_present_at_event, dtype: int64

### Physical_harassment

In [42]:
model_Physical_harassment.score(X_test,y_test['Physical_harassment'])

0.9962406015037594

In [43]:
Predictions = model_Physical_harassment.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Physical_harassment']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [44]:
y_test['Physical_harassment'].value_counts()

0    1060
1       4
Name: Physical_harassment, dtype: int64

### Security_forces_present_at_event

In [45]:
model_Security_forces_present_at_event.score(X_test,y_test['Security_forces_present_at_event'])

0.900375939849624

In [46]:
Predictions = model_Security_forces_present_at_event.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Security_forces_present_at_event']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [47]:
y_test['Security_forces_present_at_event'].value_counts()

0    958
1    106
Name: Security_forces_present_at_event, dtype: int64

# Random Forest

## Instantiate and Train Random Forest Models

In [48]:
y_columns

array(['Army_present_at_event', 'Arrests__detentions', 'Deaths_inflicted',
       'Injuries_inflicted', 'No_known_coercion_no_security_presence',
       'Party_Militias_Baltagia_present_at_event', 'Physical_harassment',
       'Security_forces_present_at_event'], dtype=object)

In [49]:
model_Army_present_at_event = RandomForestClassifier()
model_Army_present_at_event.fit(X_train,y_train['Army_present_at_event'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [50]:
model_Arrests__detentions = RandomForestClassifier()
model_Arrests__detentions.fit(X_train,y_train['Arrests__detentions'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [51]:
model_Deaths_inflicted = RandomForestClassifier()
model_Deaths_inflicted.fit(X_train,y_train['Deaths_inflicted'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [52]:
model_Injuries_inflicted = RandomForestClassifier()
model_Injuries_inflicted.fit(X_train,y_train['Injuries_inflicted'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [53]:
model_No_known_coercion_no_security_presence = RandomForestClassifier()
model_No_known_coercion_no_security_presence.fit(X_train,y_train['No_known_coercion_no_security_presence'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [54]:
model_Party_Militias_Baltagia_present_at_event = RandomForestClassifier()
model_Party_Militias_Baltagia_present_at_event.fit(X_train,y_train['Party_Militias_Baltagia_present_at_event'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [55]:
model_Physical_harassment = RandomForestClassifier()
model_Physical_harassment.fit(X_train,y_train['Physical_harassment'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [56]:
model_Security_forces_present_at_event = RandomForestClassifier()
model_Security_forces_present_at_event.fit(X_train,y_train['Security_forces_present_at_event'])

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

## Evaluate Accuracy of Model

### Army_present_at_event

In [57]:
model_Army_present_at_event.score(X_test,y_test['Army_present_at_event'])

0.9332706766917294

In [58]:
Predictions = model_Army_present_at_event.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Army_present_at_event']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [59]:
y_test['Army_present_at_event'].value_counts()

0    993
1     71
Name: Army_present_at_event, dtype: int64

### Arrests__detentions

In [60]:
model_Arrests__detentions.score(X_test,y_test['Arrests__detentions'])

0.9915413533834586

In [61]:
Predictions = model_Arrests__detentions.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Arrests__detentions']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [62]:
y_test['Arrests__detentions'].value_counts()

0    1055
1       9
Name: Arrests__detentions, dtype: int64

### Deaths_inflicted

In [63]:
model_Deaths_inflicted.score(X_test,y_test['Deaths_inflicted'])

0.9990601503759399

In [64]:
Predictions = model_Deaths_inflicted.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Deaths_inflicted']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [65]:
y_test['Deaths_inflicted'].value_counts()

0    1063
1       1
Name: Deaths_inflicted, dtype: int64

### Injuries_inflicted

In [66]:
model_Injuries_inflicted.score(X_test,y_test['Injuries_inflicted'])

0.9887218045112782

In [67]:
Predictions = model_Injuries_inflicted.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Injuries_inflicted']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [68]:
y_test['Injuries_inflicted'].value_counts()

0    1052
1      12
Name: Injuries_inflicted, dtype: int64

### No_known_coercion_no_security_presence

In [69]:
model_No_known_coercion_no_security_presence.score(X_test,y_test['No_known_coercion_no_security_presence'])

0.8016917293233082

In [70]:
Predictions = model_No_known_coercion_no_security_presence.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['No_known_coercion_no_security_presence']})
Compare['Predictions'].value_counts()

1    1064
Name: Predictions, dtype: int64

In [71]:
y_test['No_known_coercion_no_security_presence'].value_counts()

1    853
0    211
Name: No_known_coercion_no_security_presence, dtype: int64

### Party_Militias_Baltagia_present_at_event

In [72]:
model_Party_Militias_Baltagia_present_at_event.score(X_test,y_test['Party_Militias_Baltagia_present_at_event'])

0.9924812030075187

In [73]:
Predictions = model_Party_Militias_Baltagia_present_at_event.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Party_Militias_Baltagia_present_at_event']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [74]:
y_test['Party_Militias_Baltagia_present_at_event'].value_counts()

0    1056
1       8
Name: Party_Militias_Baltagia_present_at_event, dtype: int64

### Physical_harassment

In [75]:
model_Physical_harassment.score(X_test,y_test['Physical_harassment'])

0.9962406015037594

In [76]:
Predictions = model_Physical_harassment.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Physical_harassment']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [77]:
y_test['Physical_harassment'].value_counts()

0    1060
1       4
Name: Physical_harassment, dtype: int64

### Security_forces_present_at_event

In [78]:
model_Security_forces_present_at_event.score(X_test,y_test['Security_forces_present_at_event'])

0.900375939849624

In [79]:
Predictions = model_Security_forces_present_at_event.predict(X_test)
Compare = pd.DataFrame({'Predictions':Predictions,'Actuals':y_test['Security_forces_present_at_event']})
Compare['Predictions'].value_counts()

0    1064
Name: Predictions, dtype: int64

In [80]:
y_test['Security_forces_present_at_event'].value_counts()

0    958
1    106
Name: Security_forces_present_at_event, dtype: int64