In [None]:
import sys
sys.path.append('../shap')
import shap

import os
sys.path.append('../..')
import numpy as np
import deepbayesHF
import deepbayesHF.optimizers as optimizers
from deepbayesHF import PosteriorModel
from deepbayesHF.analyzers import FGSM
from deepbayesHF.analyzers import eps_LRP
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
import cv2
import random
import matplotlib.pyplot as plt
from collections import namedtuple

from joblib import Parallel, delayed
import subprocess
from statistics import mode
import json
import tensorflow as tf
import imageio

In [None]:
X_train, y_train = [], []

widths = []
heights = []
train_dir = '/home/rhiba/bayesian-ores/training_data/gtsrb/Final_Training/Images/'
for class_dir in os.listdir(train_dir):
    classid = int(class_dir)
    full_path = os.path.join(train_dir,class_dir)
    images = [x for x in os.listdir(full_path) if x.endswith('.ppm')]
    for i in images:
        actual_image = imageio.imread(os.path.join(full_path,i))
        widths.append(len(actual_image[0]))
        heights.append(len(actual_image))
        X_train.append(actual_image)
        y_train.append(classid)

In [None]:
print('Min and max widths:',min(widths),max(widths))
print('Min and max heights:',min(heights),max(heights))
rescale_size = (30,30)
print('Rescaling to:',rescale_size)

if not rescale_size == X_train[0].shape:
    X_train = np.array(list(map(lambda x:cv2.resize(x,rescale_size,interpolation=cv2.INTER_CUBIC),X_train)))
X_train = X_train/255.

In [None]:
zipped = list(zip(X_train,y_train))
#random.shuffle(zipped)
X_train = list(list(zip(*zipped))[0])
y_train = list(list(zip(*zipped))[1])
split_point = int(0.8*len(X_train))
X_test = X_train[split_point:]
X_train = X_train[:split_point]
y_test = y_train[split_point:]
y_train = y_train[:split_point]
X_train = np.array(X_train)
X_test = np.array(X_test)

In [None]:
model_name = f'GTSRB_SHAP_30x30'

opt = optimizers.VariationalOnlineGuassNewton()
likelihood = tf.keras.losses.SparseCategoricalCrossentropy()

inputs = Input(shape=X_train[0].shape)
tmp = Conv2D(4,3,padding='same',activation='relu')(inputs)
tmp = MaxPooling2D()(tmp)
tmp = Conv2D(8,3,padding='same',activation='relu')(tmp)
tmp = MaxPooling2D()(tmp)
tmp = Flatten()(tmp)
tmp = Dense(128,activation='relu')(tmp)
predictions = Dense(43,activation='softmax')(tmp)
model = Model(inputs=inputs,outputs=predictions)

bayes_model = opt.compile(model,loss_fn=likelihood,
                          epochs=25, learning_rate=0.25,
                          inflate_prior=2.0, log_file='tmp/log.txt')
bayes_model.train(X_train,y_train,X_test,y_test)
bayes_model.save(model_name)

In [None]:
model_name = f'GTSRB_SHAP_30x30'
bayes_model = PosteriorModel(model_name)
y_pred = bayes_model.predict(X_test[:50],n=50)
check_accuracy = tf.keras.metrics.Accuracy(name="train_acc")
check_accuracy(y_test[:50],np.argmax(y_pred,axis=1))
print()
print('Loaded model accuracy:',f'{check_accuracy.result().numpy()*100:.2f}%')

In [None]:
# get random input
N = 50
n = 0
while True:
    n = np.random.randint(0,len(y_train))
    n = 4137
    X = X_train[n].reshape(1,*X_train[n].shape).astype(float)

    
    #### GET CORRECTLY CLASSIFIED INPUT 
    y_hat = np.argmax(bayes_model.predict(X,n=N))
    if y_train[n] == y_hat:
        break
    

plt.imshow(X_train[n],vmin=0,vmax=1)
print('Prediction:',y_hat)
print('n:',n)

In [None]:
# get shap explanations from model samples

background = X_train[np.random.choice(X_train.shape[0],200,replace=False)]

if not os.path.exists(f'exps/exp{n}'):
    os.mkdir(f'exps/exp{n}')
    
input_path = f'exps/exp{n}/X.npy'
background_path = f'exps/exp{n}/bg.npy'
y_path = f'exps/exp{n}/y.npy'
np.save(y_path,y_hat,False)
np.save(input_path,X,False)
np.save(background_path,background,False)

iterations = 50
for i in range(iterations):
    subprocess.Popen(['python3','get_SHAP_exp.py',str(i),model_name,input_path,f'exps/exp{n}',background_path])
        
full = False 
while not full:
    if len([name for name in os.listdir(f'exps/exp{n}') if os.path.isfile(os.path.join(f'exps/exp{n}', name))]) == iterations+2:
        full = True

In [None]:
# read in and process the results from the above, naive bayesian explanation method

all_exps = []
for f in os.listdir(f'exps/exp{n}'):
    if os.path.isfile(os.path.join(f'exps/exp{n}',f)) and not f.startswith('X') and not f.startswith('bg') and not f.startswith('y'):
        tmp = np.load(os.path.join(f'exps/exp{n}',f))
        # the shap.image_plot function only works with an outer list, not np array (no idea why, its the same shape)
        if tmp.shape == (43,1,30,30,3):
            tmp = tmp[y_train[n]].reshape(30,30,3)[:,:,0]
            all_exps.append(tmp)

coverage_map = dict()
max_rel = np.max(all_exps)
limit = 0.004*max_rel
for exp in all_exps:
    exp[exp < limit] = 0
    exp[exp > 0] = 1
    #plt.imshow(exp)


In [None]:
# visualise results
cmap = dict()
names = []
es = []
for e in all_exps:
    if not str(e) in names:
        names.append(str(e))
        es.append(e)
        cmap[names.index(str(e))] = 0
    cmap[names.index(str(e))] += 1

res = max(cmap,key=cmap.get)
res_image = es[res]
plt.imshow(res_image)
plt.axis('off')
plt.show()
cov = (cmap[res]/50)*100

print("P_cover:",cov)

In [None]:
from memo import memo

@memo
def generate_min_exps(expl,threshold):
    exps = []
    for i in range(len(expl)):
        orig_expl = expl
        if orig_expl[i] == 0:
            continue
        else:
            if i == len(expl) - 1:
                s = sum(orig_expl[:i])
            else:
                s = sum(orig_expl[:i])+sum(orig_expl[i+1:])
            if s < threshold:
                exps.append(expl)
                break
            else:
                new_expl = tuple(orig_expl[:i]) + (0,)
                if i < len(expl)-1:
                    new_expl = new_expl + tuple(orig_expl[i+1:])
                new_exps = generate_min_exps(new_expl,threshold)
                exps += new_exps
    return exps

In [None]:
# better method of generating bayesian cover exp
import ast

all_exps = []
for f in os.listdir(f'exps/exp{n}'):
    if os.path.isfile(os.path.join(f'exps/exp{n}',f)) and not f.startswith('X') and not f.startswith('bg') and not f.startswith('y'):
        tmp = np.load(os.path.join(f'exps/exp{n}',f))
        # the shap.image_plot function only works with an outer list, not np array (no idea why, its the same shape)
        if tmp.shape == (43,1,30,30,3):
            tmp = tmp[y_train[n]].reshape(30,30,3)[:,:,0]
            all_exps.append(tmp)

net_count = len(all_exps)
#print(net_count)
coverage_map = dict()
max_rel = np.max(all_exps)
limit = 0.004*max_rel
all_new_exps = []
for exp in all_exps[:3]+all_exps[4:5]:
    print(exp.shape)
    exp[exp < limit] = 0
    exp[exp > 0] = 1
    exp_list = list(set(generate_min_exps(tuple(exp.flatten()),0.98*np.sum(exp))))
    all_new_exps += exp_list

In [None]:
# visualise results
cmap = dict()
pic_map = dict()
print(len(exps))
for e in all_new_exps:
    e = np.array(e)
    if not str(e) in cmap.keys():
        cmap[str(e)] = 0
        pic_map[str(e)] = np.array(e).reshape(30,30)
    #else:
    #    print('dupe')
    cmap[str(e)] += 1

res = max(cmap,key=cmap.get)
res_image = pic_map[res]
plt.imshow(res_image)
plt.axis('off')
plt.show()
cov = (cmap[res]/50)*100

print("P_cover:",cov)