# Predicting Conflict Outcomes Pt. 2
by Jigar Khatri

In [2]:
import pandas as pd
import numpy as np

In [1]:
load_ext blackcellmagic

!Image(filename = '/Users/DBerl/Deskop/Conflict Datasets/Pandas_on_Slide.gif')

In [4]:
pd.options.display.max_columns = 999

In [5]:
df = pd.read_csv('comp_war_data.csv')

In [6]:
df.shape

(1360, 32)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1360 entries, 0 to 1359
Data columns (total 32 columns):
stateabb             1360 non-null object
statea               1360 non-null int64
year                 1360 non-null int64
defensebudget        1360 non-null float64
milsize              1360 non-null float64
IronSteelProduct     1360 non-null float64
EnergyUse            1360 non-null float64
totalpop             1360 non-null float64
urbanpop             1360 non-null float64
CINC_Score_A         1360 non-null float64
version_x            1360 non-null int64
stateb               1360 non-null int64
warstrtmnth          1360 non-null int64
warstrtday           1360 non-null int64
warstrtyr            1360 non-null int64
warendmnth           1360 non-null int64
warenday             1360 non-null int64
warendyr             1360 non-null int64
warolea              1360 non-null int64
waroleb              1360 non-null int64
wardyadrolea         1360 non-null int64
wardyadroleb     

# Exploring War Role Dummies

In [None]:
warrolevaluea = df["warolea"].value_counts(normalize=True)
warrolevaluea

In [None]:
warrolevalueb = df["waroleb"].value_counts(normalize=True)
warrolevalueb

In [None]:
war_role_a_dummy = pd.get_dummies(df["warolea"], prefix="Role_A", drop_first=True)

In [None]:
war_role_b_dummy = pd.get_dummies(df["waroleb"], prefix="Role_B", drop_first=True)

In [None]:
df = pd.concat([df, war_role_a_dummy, war_role_b_dummy], axis="columns")

In [None]:
df = df.drop(columns=["warolea", "waroleb", "wardyadrolea", "wardyadroleb"])

In [None]:
df.head()

# Training a Model and Logistic Regression
`

In [15]:
df.columns

Index(['stateabb', 'statea', 'year', 'defensebudget', 'milsize',
       'IronSteelProduct', 'EnergyUse', 'totalpop', 'urbanpop', 'CINC_Score_A',
       'version_x', 'stateb', 'warstrtmnth', 'warstrtday', 'warstrtyr',
       'warendmnth', 'warenday', 'warendyr', 'outcomea', 'batdtha', 'batdthb',
       'durindx', 'diff_in_days', 'diff_in_deaths', 'total_battle_dths',
       'CINC_Score_B', 'version_y', 'diff_CINC', 'Role_A_2', 'Role_A_3',
       'Role_A_4', 'Role_B_2', 'Role_B_3', 'Role_B_4'],
      dtype='object')

In [None]:
feature_cols = [
    "diff_in_days",
    "diff_in_deaths",
    "diff_CINC",
    "total_battle_dths",
    "Role_A_2",
    "Role_A_3",
    "Role_A_4",
    "Role_B_2",
    "Role_B_3",
    "Role_B_4",
    "Role_A_2",
    "Role_A_3",
    "Role_A_4",
    "Role_B_2",
    "Role_B_3",
    "Role_B_4",
]

X = df[feature_cols]
y = df["outcomea"]

# Discussion of Feature Columns Chosen
- The dataset that I have constructed examines conventional inter-state conflict in a dyadic context. The goal of my analysis is to see how well several features such as the length of the war, a state's role in a war, the differences in casualties,  and the differences in national material capabilities predict the outcome of a war (win,lose, or draw) for a state (statea).
- I use several models to try to see how well each of them performs when it comes to predicting outcomes and which features are most important. 

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import (
    accuracy_score,
    recall_score,
    precision_score,
    confusion_matrix,
)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
logreg = LogisticRegression(solver="lbfgs", multi_class="multinomial")

In [None]:
logreg.fit(X_train, y_train)

In [None]:
y_pred = logreg.predict(X_test)

In [None]:
logreg.score(X_test, y_test)

In [None]:
accuracy_score(y_pred, y_test)

In [None]:
precision_score(y_test, y_pred, average=None)

In [None]:
cm = confusion_matrix(y_test, y_pred)
cm

In [27]:
import seaborn as sns

In [None]:
ax = sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
ax.set_ylabel("True label")
ax.set_xlabel("Predicted label")

In [29]:
df['outcomea'].value_counts()

3.0    535
1.0    525
2.0    300
Name: outcomea, dtype: int64

In [None]:
cross_val_score(LogisticRegression(solver="lbfgs"), X, y, cv=5).mean()

In [None]:
logreg = LogisticRegression(solver="lbfgs", n_jobs=-1)

params = {"fit_intercept": [True, False], "C": [1.0, 0.1, 0.01, 0.001]}

clf = GridSearchCV(logreg, params, cv=5, verbose=1, n_jobs=-1)

In [32]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 8 candidates, totalling 40 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed:    4.8s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn', n_jobs=-1,
          penalty='l2', random_state=None, solver='lbfgs', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'fit_intercept': [True, False], 'C': [1.0, 0.1, 0.01, 0.001]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=1)

In [33]:
clf.best_score_ 

0.442156862745098

In [34]:
clf.best_params_

{'C': 1.0, 'fit_intercept': True}

In [35]:
y_pred = clf.predict(X_test)

In [36]:
clf.score(X_test, y_test)

0.45294117647058824

In [None]:
# We saw a noticeable drop in accuracy following the use of k-fold cross validation. 

# K Nearest Neighbors

In [37]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import pandas as pd

In [None]:
feature_cols = [
    "diff_in_days",
    "diff_in_deaths",
    "diff_CINC",
    "total_battle_dths",
    "Role_A_2",
    "Role_A_3",
    "Role_A_4",
    "Role_B_2",
    "Role_B_3",
    "Role_B_4",
    "Role_A_2",
    "Role_A_3",
    "Role_A_4",
    "Role_B_2",
    "Role_B_3",
    "Role_B_4",
]

X = df[feature_cols]
y = df["outcomea"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [40]:
knn = KNeighborsClassifier(n_jobs=-1)

In [41]:
knn

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
           weights='uniform')

In [None]:
parameters = {"n_neighbors": range(1, 100), "weights": ["uniform", "distance"]}

clf = GridSearchCV(knn, parameters, cv=5, scoring="accuracy", verbose=1)

In [43]:
%%time
clf.fit(X_train, y_train)

Fitting 5 folds for each of 198 candidates, totalling 990 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Wall time: 30.5 s


[Parallel(n_jobs=1)]: Done 990 out of 990 | elapsed:   30.4s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': range(1, 100), 'weights': ['uniform', 'distance']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=1)

In [44]:
clf.best_params_

{'n_neighbors': 14, 'weights': 'distance'}

In [45]:
clf.best_score_

0.8980392156862745

In [46]:
clf_results = pd.DataFrame(clf.cv_results_)
clf_results.head()



Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,0.011899,0.010041,0.015215,0.002703,1,uniform,"{'n_neighbors': 1, 'weights': 'uniform'}",0.902439,0.921569,0.872549,0.872549,0.901478,0.894118,0.019013,11,1.0,1.0,1.0,1.0,1.0,1.0,0.0
1,0.004041,0.004241,0.021016,0.005015,1,distance,"{'n_neighbors': 1, 'weights': 'distance'}",0.902439,0.921569,0.872549,0.872549,0.901478,0.894118,0.019013,11,1.0,1.0,1.0,1.0,1.0,1.0,0.0
2,0.004829,0.00518,0.01119,0.006745,2,uniform,"{'n_neighbors': 2, 'weights': 'uniform'}",0.868293,0.872549,0.848039,0.818627,0.891626,0.859804,0.024805,101,0.95092,0.943627,0.957108,0.947304,0.942472,0.948286,0.005315
3,0.009376,0.007655,0.012495,0.006248,2,distance,"{'n_neighbors': 2, 'weights': 'distance'}",0.902439,0.921569,0.872549,0.872549,0.901478,0.894118,0.019013,11,1.0,1.0,1.0,1.0,1.0,1.0,0.0
4,0.003126,0.006252,0.0125,0.00625,3,uniform,"{'n_neighbors': 3, 'weights': 'uniform'}",0.8,0.828431,0.857843,0.818627,0.876847,0.836275,0.027563,102,0.919018,0.911765,0.925245,0.928922,0.915545,0.920099,0.006256


In [47]:
y_pred = clf.predict(X_test)

In [None]:
metrics.accuracy_score(y_test, y_pred)

In [49]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt="d", linewidths=0.5)

plt.ylabel("True label")
plt.xlabel("Predicted label")

In [51]:
y_test.shape

(340,)

In [None]:
cm_norm = cm / cm.sum(axis=1)

sns.heatmap(cm_norm, annot=True, linewidths=0.5, cmap="Blues")

plt.ylabel("True label")
plt.xlabel("Predicted label")

In [53]:
y_train.value_counts(normalize=True)

3.0    0.397059
1.0    0.381373
2.0    0.221569
Name: outcomea, dtype: float64

In [54]:
from sklearn.dummy import DummyClassifier

In [55]:
dummy_knrf = DummyClassifier()

In [56]:
dummy_knrf.fit(X_train, y_train)

DummyClassifier(constant=None, random_state=None, strategy='stratified')

In [57]:
y_dummy_kn = dummy_knrf.predict(X_test)

In [58]:
accuracy_score(y_test, y_dummy_kn)

0.37941176470588234

In [None]:
# As we can see from comparing the accuracy scores of the knn model with the dummy classifier, the knn model produces 
# results that are far better than if we had simply guessed.

# Random Forests 

In [59]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rfreg = RandomForestRegressor(
    n_estimators=150, max_features=5, oob_score=True, random_state=1
)
rfreg.fit(X, y)

In [None]:
pd.DataFrame(
    {"feature": feature_cols, "importance": rfreg.feature_importances_}
).sort_values(by="importance")

In [None]:
print((rfreg.oob_score_))  # Rsquared

scores = cross_val_score(rfreg, X, y, cv=10, scoring="neg_mean_squared_error")
np.mean(np.sqrt(-scores))  # RMSE

In [63]:
X.shape

(1360, 16)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=89)

In [None]:
rfreg = RandomForestRegressor(
    n_estimators=150, max_features=5, oob_score=True, random_state=1
)
rfreg.fit(X_train, y_train)

In [None]:
from sklearn.feature_selection import SelectFromModel

print(SelectFromModel(rfreg, threshold="mean", prefit=True).transform(X_train).shape)
print(SelectFromModel(rfreg, threshold="median", prefit=True).transform(X_train).shape)

In [None]:
X_important = SelectFromModel(rfreg, threshold="mean", prefit=True).transform(X_test)

In [None]:
rfreg = RandomForestRegressor(n_estimators=150, max_features=3, random_state=1)

scores = cross_val_score(
    rfreg, X_important, y_test, cv=10, scoring="neg_mean_squared_error"
)
np.mean(np.sqrt(-scores))

In [None]:
estimator_range = list(range(10, 300, 10))

RMSE_scores = []

for estimator in estimator_range:
    rfreg = RandomForestRegressor(n_estimators=estimator, random_state=1)
    MSE_scores = cross_val_score(rfreg, X, y, cv=5, scoring="neg_mean_squared_error")
    RMSE_scores.append(np.mean(np.sqrt(-MSE_scores)))

In [None]:
plt.plot(estimator_range, RMSE_scores)

plt.xlabel("n_estimators")
plt.ylabel("RMSE (lower is better)")

In [None]:
feature_range = list(range(1, len(feature_cols) + 1))


RMSE_scores = []

for feature in feature_range:
    rfreg = RandomForestRegressor(
        n_estimators=150, max_features=feature, random_state=1
    )
    MSE_scores = cross_val_score(rfreg, X, y, cv=10, scoring="neg_mean_squared_error")
    RMSE_scores.append(np.mean(np.sqrt(-MSE_scores)))

In [None]:
plt.plot(feature_range, RMSE_scores)

plt.xlabel("max_features")
plt.ylabel("RMSE (lower is better)")

In [None]:
dummy_knrf = DummyClassifier()

In [None]:
dummy_knrf.fit(X_train, y_train)

In [None]:
y_dummy_kn = dummy_knrf.predict(X_test)

In [76]:
accuracy_score(y_test, y_dummy_kn)

0.3558823529411765

In [None]:
#As we can see, the random forest model also produces much better results than the dummy classifier. 

# Takeaways
- There were a few things that genuinely surprised me in the analysis.
- The poor results from the logistic regression model, despite the grid search seems to indicates that additional work needs to be done (feature scaling, normalization, etc.)
- The random forest and knn models performed as I had expected given that they are generally regarded to be more accurate than logistic regression models.
- Looking at the differences between the knn and the random forest model, I can't help but wonder if the knn model is overfitting. 

# References
- Zeev Maoz, Paul L. Johnson, Jasper Kaplan, Fiona Ogunkoya, and Aaron Shreve 2019. The Dyadic Militarized Interstate Disputes (MIDs) Dataset Version 3.0: Logic, Characteristics, and Comparisons to Alternative Datasets, Journal of Conflict Resolution (forthcoming).

- Singer, J. David, Stuart Bremer, and John Stuckey. (1972). "Capability Distribution, Uncertainty, and Major Power War, 1820-1965." in Bruce Russett (ed) Peace, War, and Numbers, Beverly Hills: Sage, 19-48. Version 5. 