# Anchors

In [115]:
import pandas as pd
import numpy as np
from SVM_model import SVM_model
from Functions import separate_bins_feature

# Assuming a model that gives % of 'goodness' for any sample

# Assuming that for categorical features there will be only values in range 0-7. 

# Hardcodes the constraints for the direction in which to move
# 1: Move up to to improve
# 0: Move down to improve
# -1: Needs check

monotonicity_arr = [1,1,1,1,1,0,0,1,1,1,1,-1,0,-1,1,0,0,0,0,-1,-1,0,-1]

In [116]:
# Create arrays with mean values for each bin and the position in the bin of each value

# X = original data set (9871x23)
df = pd.read_csv("working_data_full.csv")
vals = df.values
X = vals[:,2:]
y = vals[:,1]

samples, features = X.shape

svm_model = SVM_model(None,"working_data_full.csv")
svm_model.train_model(0.01)
svm_model.test_model()

Training Accuracy: 72.81 %
Test Accuracy: 74.58 %


In [137]:
def evaluate_data_set(data):
    no_features = data.shape[1]
    avg_list = []
    std_list = []
    for i in range(no_features):
        current_col = data[:,i].flatten()
        std_list.append(np.std(current_col))
        avg_list.append(np.mean(current_col))
          
    return avg_list, std_list

def perturb_special(min_val,max_val,avg,std,no_val):
    new_col = np.random.normal(avg, std, no_val)
    # Note: these functions have poor time complexity
    np.place(new_col,new_col < min_val, min_val)
    np.place(new_col,new_col > max_val, max_val)
    new_col = new_col.round(0)
    return new_col
    

def find_anchors(model, data_set, sample, no_val):
    # Account for the special categorical columns
    special_cols = [9,10]
    
    features = sample.shape[0]
    avg_list, std_list = evaluate_data_set(data_set)

    # Precision Treshold
    treshold = 0.95
    
    # Identify original result from sample
    initial_percentage = model.run_model(sample)
    print("Initial %",initial_percentage)
    decision = np.round(initial_percentage,0)
    
    print("-------------------------------")
    # Create empty mask 
    mask = np.zeros(features)
    
    # Allows tracking the path
    locked = []
    
    # Iterations allowed
    iterations = 4
    
    while (iterations > 0):
        # Retains best result and the corresponding index
        max_ind = (0,0)
    
        # Assign column that is being tested
        for test_col in range(features):
            new_data = np.empty([features, no_val])

            # Perturb data
            for ind in range(features):
                if (ind == test_col) or (ind in locked):
                    new_data[ind] = np.array(np.repeat(sample[ind],no_val))
                else:
                    if (ind in special_cols):
                        new_data[ind] = perturb_special(0,7,avg_list[ind],std_list[ind],no_val)
                    else:
                        new_data[ind] = np.random.normal(avg_list[ind], std_list[ind], no_val)
            
            new_data = new_data.transpose()

            # Run Model 
            pred = model.run_model_data(new_data)
            acc = (np.mean(pred == decision))
            
            if (acc > max_ind[0]):
                max_ind = (acc,test_col)
                

        locked.append(max_ind[1])
            
        for n in locked:
            mask[n] = 1
            
        print("Precision:",max_ind[0])
        print(mask)
            
        if (max_ind[0] >= treshold):
            return mask
        iterations -= 1
        
    print("!!! No anchors found !!!")
    return None


In [138]:
find_anchors(svm_model, X, X[8], 100)

Initial % 0.648716478078
-------------------------------
Precision: 0.62
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
Precision: 0.79
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.
  0.  0.  0.  0.  0.]
Precision: 0.87
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.
  0.  0.  0.  0.  0.]
Precision: 0.92
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.
  0.  1.  0.  0.  0.]
!!! No anchors found !!!


In [142]:
for i in range(80,100):
    find_anchors(svm_model, X, X[i], 100)
    print("\n ==========================================================  \n")
    

Initial % 0.669944092338
-------------------------------
Precision: 0.65
[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
Precision: 0.86
[ 1.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
Precision: 0.95
[ 1.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
  0.  0.  0.  0.  0.]


Initial % 0.208767669798
-------------------------------
Precision: 0.81
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
Precision: 0.89
[ 0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
Precision: 0.92
[ 1.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.]
Precision: 1.0
[ 1.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.
  0.  0.  0.  0.  0.]


Initial % 0.379412127897
-------------------------------
Precision: 0.74
[ 0.  0.  0.  0.  0.  0.  0.  0. 