# Single instance explanations

In [2]:
import pandas as pd
import numpy as np
from SVM_model import SVM_model
from Functions import separate_bins_feature

# Assuming a model that gives % of 'goodness' for any sample

# Assuming that for categorical features there will be only values in range 0-7. 

# Hardcodes the constraints for the direction in which to move
# 1: Move up to to improve
# 0: Move down to improve
# -1: Needs check
monotonicity_arr = [1,1,1,1,1,0,0,1,1,1,1,-1,0,-1,1,0,0,0,0,-1,-1,0,-1]

In [7]:
# Create arrays with mean values for each bin and the position in the bin of each value

# X = original data set (9871x23)
df = pd.read_csv("working_data_full.csv")
vals = df.values
X = vals[:,2:]
y_original= vals[:,1]

samples = X.shape[0]
features = X.shape[1]

mean_values_bins = []
X_bined_positions = []

for i in range (features):
    if i in [9,10]:
        result_bins = np.array([0,1,2,3,4,5,6,7,-1,-1])
        result_new_col = X[:,i].flatten()
    else:
        result_bins, result_new_col = separate_bins_feature(X[:,i].flatten())[:2]
    mean_values_bins.append(result_bins)
    X_bined_positions.append(result_new_col)
    
mean_values_bins = np.array(mean_values_bins)
X_bined_positions = np.array(X_bined_positions)  
X_bined_positions = np.transpose(X_bined_positions)

print(mean_values_bins, mean_values_bins.shape)
print(X_bined_positions, X_bined_positions.shape)

[[  54.   58.   62.   66.   70.   74.   78.   82.   86.   90.]
 [  25.   63.  101.  139.  177.  215.  253.  291.  329.  367.]
 [   1.    4.    7.   10.   13.   16.   19.   22.   25.   28.]
 [  17.   30.   43.   56.   69.   82.   95.  108.  121.  134.]
 [   2.    6.   10.   14.   18.   22.   26.   30.   34.   38.]
 [   0.    1.    2.    3.   -1.   -1.   -1.   -1.   -1.   -1.]
 [   0.    1.    2.   -1.   -1.   -1.   -1.   -1.   -1.   -1.]
 [  71.   75.   79.   83.   87.   91.   95.   99.  103.  107.]
 [  10.   31.   52.   73.   94.  115.  136.  157.  178.  199.]
 [   0.    1.    2.    3.    4.    5.    6.    7.   -1.   -1.]
 [   0.    1.    2.    3.    4.    5.    6.    7.   -1.   -1.]
 [   2.    6.   10.   14.   18.   22.   26.   30.   34.   38.]
 [   0.    1.    2.    3.    4.    5.    6.   -1.   -1.   -1.]
 [   3.   10.   17.   24.   31.   38.   45.   52.   59.   66.]
 [   7.   21.   35.   49.   63.   77.   91.  105.  119.  133.]
 [   0.    1.    2.    3.    4.    5.    6.   -1.   -1.

In [13]:
svm_model = SVM_model(None, "working_data_full.csv")

svm_model.train_model(0.001)
svm_model.test_model()

Training Accuracy: 72.81 %
Test Accuracy: 74.58 %


In [16]:
def perturb_row_feature(model, row, row_idx, feat_idx, current_bins):
    
    c_current_bins = np.copy(current_bins)
    direction = monotonicity_arr[feat_idx]
    current_bin = np.copy(c_current_bins[feat_idx])
    
    if current_bin != 9:
        next_value = mean_values_bins[feat_idx][int(current_bin+1)]
    if current_bin != 0:
        prev_value = mean_values_bins[feat_idx][int(X_bined_positions[row_idx][feat_idx]-1)]
    
    # Check if in boundary and return the same row
    if direction == -1:
        if current_bin == 0:
            direction = 1
        elif current_bin == 9 or next_value == -1:
            direction = 0
    if direction == 1:
        if current_bin == 9 or next_value == -1:
            return (row, c_current_bins)
    elif direction == 0 and current_bin ==  0:
            return (row, c_current_bins)
    
    # Decide direction in special case
    if direction == -1:
        row_up = np.copy(row)
        row_down = np.copy(row)
        row_up[feat_idx] = next_value
        row_down[feat_idx] = prev_value
        percent_1 = model.run_model(row_up)
        percent_0 = model.run_model(row_down)
        if percent_1 >= percent_0:
            c_current_bins[feat_idx] += 1
            return (row_up, c_current_bins)
        else:
            c_current_bins[feat_idx] -= 1
            return (row_down, c_current_bins)
        
    else:
        p_row = np.copy(row)
        if direction == 1:
            c_current_bins[feat_idx] += 1
            p_row[feat_idx] = next_value
        elif direction == 0:
            c_current_bins[feat_idx] -= 1
            p_row[feat_idx] = prev_value
        
        return (p_row, c_current_bins)

def instance_explanation(model, k_row, row_idx):
    
    row = np.copy(k_row)
    percent = model.run_model(row)
    features_moved = np.zeros(23)
    times_moved = np.zeros(23)
    change_vector = np.zeros(23)
    
    original_bins = np.copy(X_bined_positions[row_idx])
    current_bins = np.copy(X_bined_positions[row_idx])
       
    while percent <= 0.5 and (features_moved == 1).sum() < 5 and max(times_moved) < 5:
    
        new_percents = []
        pert_rows = []
        new_curr_bins = []
        
        for i in range(len(row)):
            t_row, t_current_bins = perturb_row_feature(model, row, row_idx, i, current_bins)
            pert_rows.append(t_row)
            new_curr_bins.append(t_current_bins)
            new_percents.append(model.run_model(t_row))

        new_percents = np.array(new_percents)
        idx = np.argmax(new_percents)
        
        row = pert_rows[idx]
        percent = new_percents[idx]
        current_bins = new_curr_bins[idx]

        features_moved[idx] = 1
        times_moved[idx] += 1
    
    for l in range(23):
        change_vector[l] = current_bins[l] - original_bins[l]
        
    if percent >= 0.5:
        return change_vector, row
    else:
        return ("Decision can't be moved within thresholds.")
    
    


In [17]:
for i in range(5):
    print(i, instance_explanation(svm_model, X[i], i)[0])

0 [ 4.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
1 D
2 D
3 D
4 [ 3.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
