In [1]:
import keras
import h5py
import cv2
import numpy as np
from scipy.stats import norm
from matplotlib import pyplot as plt

In [2]:
"""
DataLoader class is inspired by https://github.com/csaw-hackml/CSAW-HackML-2020/blob/master/eval.py
"""

class DataLoader:
  def __init__(self, file_path):
    self.file_path = file_path

  def load(self):
    data = h5py.File(self.file_path, "r")
    x_data = np.asarray(data["data"])
    self.x = x_data.transpose((0,2,3,1))
    self.y = np.asarray(data["label"])
  
  def preprocess(self):
    self.x = np.asarray(self.x/255, np.float64)

In [3]:
class STRIP:
  def __init__(self, N, alpha, beta, gamma):
    self.N = N
    self.alpha = alpha
    self.beta = beta
    self.gamma = gamma

  def blend_image(self, img1, img2):
    return cv2.addWeighted(img1,self.alpha,img2,self.beta,self.gamma).reshape(55,47,3)

  def calculate_entropy(self, target_img, model, input_img):
    blended_img = np.zeros((self.N,55,47,3))
    random_index = np.random.choice(np.arange(len(input_img)), self.N, replace=False)
    for i in range(self.N):
      blended_img[i] = self.blend_image(target_img, input_img[random_index[i]])
    pred_label = model.predict(blended_img)
    entropy = -np.nansum(pred_label*np.log2(pred_label))
    return entropy

  def generate_entropy_distribution(self, input_img, clean_img, model):
    l = len(input_img)
    entropy_distribution = np.zeros(l)
    for i in range(l):
      target_img = input_img[i]
      entropy_distribution[i] = self.calculate_entropy(target_img, model, clean_img)
    return entropy_distribution
  
  def predict(self, entropy_clean, input_img, model):
    mu, sigma = norm.fit(entropy_clean)
    threshold = norm.ppf(0.05, loc=mu, scale=sigma)
    l = len(input_img)
    pred_label = np.zeros(l)
    for i in range(l):
      target_entropy = self.calculate_entropy(input_img[i], model, input_img)
      if target_entropy < threshold:
        pred_label[i] = 1
    return pred_label

In [11]:
clean_data_test_filename = "drive/MyDrive/MlForCyberProject/clean_test_data.h5"
poisoned_data_sunglasses_filename = "drive/MyDrive/MlForCyberProject/sunglasses_poisoned_data.h5"
sunglasses_bd_model_filename = "drive/MyDrive/MlForCyberProject/sunglasses_bd_net.h5"
entropy_sunglasses_filename = "entropy_clean_sunglasses.h5"

test_clean = DataLoader(clean_data_test_filename)
test_poisoned_sunglasses = DataLoader(poisoned_data_sunglasses_filename)

test_clean.load()
test_clean.preprocess()
test_poisoned_sunglasses.load()
test_poisoned_sunglasses.preprocess()

sunglasses_bd_model = keras.models.load_model(sunglasses_bd_model_filename)

entropy_clean_sunglasses_data = h5py.File(entropy_sunglasses_filename, "r")
entropy_clean_sunglasses = np.asarray(entropy_clean_sunglasses_data["data"])

In [16]:
STRIP_filter = STRIP(50, 1.5, 1, 0)

pred_poisoned = STRIP_filter.predict(entropy_clean_sunglasses, np.vstack((test_poisoned_sunglasses.x[:200],test_clean.x[:800])), sunglasses_bd_model)



In [17]:
print(np.sum(pred_poisoned))

242.0
