In [7]:
# mlp for multi-label classification
import numpy as np
from numpy import mean
from numpy import std
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [18]:
# get the dataset
def get_dataset():
 df = pd.read_csv('data/all_states_nn_balanced.csv')
 df.drop(['Unnamed: 0','results','index'],axis=1,inplace=True)
 
 # Categorical boolean mask
 

 y= df[['wants_meds', 'wants_food' , 'attacks_doctor', 'attacks_soldier', 'attacks_actor']]
 
 X = df.drop(['wants_meds', 'wants_food' , 'attacks_doctor', 'attacks_soldier', 'attacks_actor'], axis=1)
 print(X.columns)
 print(y.columns)
 categorical_feature_mask = X.dtypes==object
# filter categorical columns using mask and turn it into a list
 categorical_cols = X.columns[categorical_feature_mask].tolist()

 for col in categorical_cols:
  one_hot = pd.get_dummies(X[col])
  X = X.drop(col,axis = 1)
# Join the encoded df
  X = X.join(one_hot)

 

 return X.to_numpy(), y.to_numpy()


In [19]:

# define dataset
X, y = get_dataset()
# summarize dataset shape
print(X.shape, y.shape)
# summarize first few examples
for i in range(1):
 print(X[i], y[i])

Index(['doctor_altruism', 'doctor_likes_soldier', 'doctor_likes_actor',
       'doctor_has_food', 'doctor_has_meds', 'soldier_altruism',
       'soldier_likes_doctor', 'soldier_likes_actor', 'soldier_has_food',
       'soldier_has_meds', 'actor_altruism', 'actor_likes_doctor',
       'actor_likes_soldier', 'actor_has_food', 'actor_has_meds',
       'food_quantity', 'meds_quantity', 'active'],
      dtype='object')
Index(['wants_meds', 'wants_food', 'attacks_doctor', 'attacks_soldier',
       'attacks_actor'],
      dtype='object')
(4454, 20) (4454, 5)
[-1  0  0  0  0  2 -1 -1  1  1 -1 -2 -2  0  1  0  1  1  0  0] [1. 0. 0. 0. 0.]


In [23]:

# get the model
def get_model(n_inputs, n_outputs):
 model = Sequential()
 model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
 model.add(Dense(16,  kernel_initializer='he_uniform', activation='relu'))
 model.add(Dense(n_outputs, activation='sigmoid'))
 model.compile(loss='binary_crossentropy', optimizer='adam')
 return model

In [21]:

# evaluate a model using repeated k-fold cross-validation
def evaluate_model(X, y):
 results = list()
 n_inputs, n_outputs = X.shape[1], y.shape[1]
 # define evaluation procedure
 cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
 # enumerate folds
 for train_ix, test_ix in cv.split(X):
    # prepare data
    X_train, X_test = X[train_ix], X[test_ix]
    y_train, y_test = y[train_ix], y[test_ix]
    # define model
    model = get_model(n_inputs, n_outputs)
    # fit model
    model.fit(X_train, y_train, verbose=0, epochs=500,batch_size=15)
    # make a prediction on the test set
    yhat = model.predict(X_test)
    #print(np.argmax(model.predict(X_test), axis=-1))
    # round probabilities to class labels
    yhat = yhat.round()
    # calculate accuracy
    acc = accuracy_score(y_test, yhat)
    # store result
    print('>%.3f' % acc)
    results.append(acc)
 return results

In [24]:
# load dataset
X, y = get_dataset()
# evaluate model
results = evaluate_model(X, y)
# summarize performance
print('Accuracy: %.3f (%.3f)' % (mean(results), std(results)))

Index(['doctor_altruism', 'doctor_likes_soldier', 'doctor_likes_actor',
       'doctor_has_food', 'doctor_has_meds', 'soldier_altruism',
       'soldier_likes_doctor', 'soldier_likes_actor', 'soldier_has_food',
       'soldier_has_meds', 'actor_altruism', 'actor_likes_doctor',
       'actor_likes_soldier', 'actor_has_food', 'actor_has_meds',
       'food_quantity', 'meds_quantity', 'active'],
      dtype='object')
Index(['wants_meds', 'wants_food', 'attacks_doctor', 'attacks_soldier',
       'attacks_actor'],
      dtype='object')
[1 1 0 1 1 0 1 0 0 0 1 0 0 1 0 0 0 3 1 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0
 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 1 0 0 0 1 0 1 0 1 1 1 1 0 1 1 1 1 0 1 1 0
 0 1 0 1 0 1 0 1 1 0 0 0 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1
 0 0 0 4 1 0 0 1 1 0 1 1 4 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 0 0 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 0 0 0
 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 1 1 1 1 1 0 0
 1 1 0 1 1 0 

In [13]:
from numpy import asarray
n_inputs, n_outputs = X.shape[1], y.shape[1]
model = get_model(n_inputs, n_outputs)
# fit the model on all data
model.fit(X, y, verbose=0, epochs=100)
# make a prediction for new data


<keras.callbacks.History at 0x22836f19960>

['doctor_altruism', 'doctor_likes_soldier', 'doctor_likes_actor',
       'doctor_has_food', 'doctor_has_meds', 'soldier_altruism',
       'soldier_likes_doctor', 'soldier_likes_actor', 'soldier_has_food',
       'soldier_has_meds', 'actor_altruism', 'actor_likes_doctor',
       'actor_likes_soldier', 'actor_has_food', 'actor_has_meds',
       'food_quantity', 'meds_quantity', 'active']
['wants_meds', 'wants_food', 'attacks_doctor', 'attacks_soldier',
       'attacks_actor']

In [17]:
np.set_printoptions(suppress=True)
row = [-2, #'doctor_altruism'
        -2, #'doctor_likes_soldier'
        -2, #'doctor_likes_actor'
        0, #'doctor_has_food'
        0, #'doctor_has_meds'
        0, #soldier_altruism
        2, #'soldier_likes_doctor'
        0, #'soldier_likes_actor'
        0, #'soldier_has_food'
        0, # 'soldier_has_meds'
        1, # 'actor_altruism'
        0, #'actor_likes_doctor'
        -1, # 'actor_likes_soldier'
        0, #'actor_has_food'
        0, #'actor_has_meds'
        1, #'food_quantity'
        1, #'meds_quantity'
        0 #'active'
        ]
newX = asarray([row])
yhat = model.predict(newX)
print(np.argmax(yhat, axis=-1))
print('Predicted: %s' % yhat[0])

[4]
Predicted: [0.         0.00009669 0.00000012 0.         0.09384069]


In [16]:
np.set_printoptions(suppress=True)
row = [-2, #'doctor_altruism'
        -2, #'doctor_likes_soldier'
        -2, #'doctor_likes_actor'
        0, #'doctor_has_food'
        0, #'doctor_has_meds'
        0, #soldier_altruism
        2, #'soldier_likes_doctor'
        0, #'soldier_likes_actor'
        0, #'soldier_has_food'
        0, # 'soldier_has_meds'
        1, # 'actor_altruism'
        0, #'actor_likes_doctor'
        1, # 'actor_likes_soldier'
        0, #'actor_has_food'
        1, #'actor_has_meds'
        1, #'food_quantity'
        0, #'meds_quantity'
        0 #'active'
        ]
newX = asarray([row])
yhat = model.predict(newX)
print(np.argmax(yhat, axis=-1))
print('Predicted: %s' % yhat[0])

[1]
Predicted: [0.         0.99981415 0.         0.00000002 0.00000307]
