In [1]:
import numpy as np
import pickle
import pandas as pd
import spacy
nlp = spacy.load("en_core_web_lg")

In [2]:
directory = 'vers33/MLP_Classifiers_480k_training_15_iter_NN_size_200_50_30'
#use_z_values = tuple(range(25))
#use_z_values = (0,6,12,18,24)
use_z_values = (0,3,4,6,8,9,12,13,14,15,16,17,18,19,20,21,22,23,24)
model_names = ["S1-S2",
               "S1-V2",
               "S1-O2",
               "S1-L2",
               "S1-C2",
               "V1-S2",
               "V1-V2",
               "V1-O2",
               "V1-L2",
               "V1-C2",
               "O1-S2",
               "O1-V2",
               "O1-O2",
               "O1-L2",
               "O1-C2",
               "L1-S2",
               "L1-V2",
               "L1-O2",
               "L1-L2",
               "L1-C2",
               "C1-S2",
               "C1-V2",
               "C1-O2",
               "C1-L2",
               "C1-C2",]
model_names = [name for i,name in enumerate(model_names) if i in use_z_values]

clf = list()
for i in range(len(use_z_values)):
    with open("../03_Bayesian_Network/" + directory + "/MLP_Classifier" + str(i) + ".pkl", "rb") as f:
        clf += [pickle.load(f), ]

In [3]:
train1 = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_train_1.csv')
train2 = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_train_2.csv')
train = pd.concat([train1, train2])
train = train[train.notnull().apply(all, axis=1)]
dev = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_dev.csv')
dev = dev[dev.notnull().apply(all, axis=1)]
test = pd.read_csv('../Input_Data/e-SNLI/dataset/esnli_test.csv')
test = test[test.notnull().apply(all, axis=1)]

dev_prepared = pd.read_csv('../02_Extract_Subphrases/prepared_data/subphrase_vectors_dev.csv', sep=';')
dev_prepared = dev_prepared.drop(columns='Unnamed: 0')
dev = dev.set_index('pairID')
rel_pairIDs = dev_prepared.iloc[:,0]
y_hat = dev.loc[rel_pairIDs].gold_label
dev_prepared = dev_prepared.iloc[:,1:].to_numpy()

dev_subphrases = pd.read_csv('../02_Extract_Subphrases/prepared_data/subphrases_dev.csv', sep=',')
dev_subphrases = dev_subphrases.set_index('pairID')
dev_subphrases = dev_subphrases.loc[rel_pairIDs]

In [4]:
# Prepare colum indices
indices = np.array([[0,1500], [0,1800], [0,2100], [0,2400], [0,2700],
                    [300,1500], [300,1800], [300,2100], [300,2400], [300,2700],
                    [600,1500], [600,1800], [600,2100], [600,2400], [600,2700],
                    [900,1500], [900,1800], [900,2100], [900,2400], [900,2700],
                    [1200,1500], [1200,1800], [1200,2100], [1200,2400], [1200,2700]])
indices = indices[use_z_values,:].tolist()
# Initialise colulmn indices and "nan" values if information (e.g. location of sentence) is not detected
not_nan = [None, ] * len(use_z_values)
cols = [None, ] * len(use_z_values)
for i in range(len(use_z_values)):
    cols[i] = list(range(indices[i][0], indices[i][0]+300)) + list(range(indices[i][1],indices[i][1]+300))
    not_nan[i] = pd.Series([not x for x in pd.DataFrame(np.isnan(dev_prepared[:,cols[i]])).apply(any, axis=1)])
not_nan = np.array(not_nan).T

In [5]:
def softmax(x):
    """
    Compute softmax values for each set of scores in x.
    """
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def relu(x):
    return np.maximum(x, 0)

def custom_sign(x):
    if len(x.shape) == 2 and x.shape[1] == 1:
        x = x.reshape((-1,))
    else:
        raise ValueError(f"x must be of shape (n,1), not {x.shape}")
    return np.diag((np.sign(x) + 1) / 2) # np.sign returns 1 and -1, but we want 1 and 0

def get_gradient(x1, x2, net, j):
    x = np.vstack((x1, x2))
    n_hidden = net.n_layers_ - 2 # number of hidden layers (subtract output and input layer)
    n = x.shape[0]

    # Get weights and biases from net
    W = net.coefs_
    W = [w.T for w in W]
    b = net.intercepts_
    b = [bt.reshape((-1,1)) for bt in b]

    # Calculate the partial derivatives of each factor in the chain for the final gradient
    # Forward propagation
    h = [relu(W[0] @ x + b[0])]
    for i in range(1, n_hidden):
        h += [relu(W[i] @ h[i-1] + b[i])]
    h += [softmax(W[n_hidden] @ h[-1] + b[-1])]

    # Backward propagation
    delta = np.zeros((net.n_outputs_, 1))
    delta[j] = 1
    gradients = [custom_sign(W[0] @ x + b[0]) @ W[0][:,int(n/2):], ]
    for i in range(1, n_hidden):
        gradients += [custom_sign(W[i] @ h[i-1] + b[i]) @ W[i], ]
    gradients += [(softmax(W[-1] @ h[-2] + b[-1]) * (delta - softmax(W[-1] @ h[-2] + b[-1]))).T @ W[-1], ]

    gradient = gradients[-1]
    for i in range(n_hidden-1, -1, -1):
        gradient = gradient @ gradients[i]
    return gradient.T

In [6]:
def pertubate_input(x1, x2, net, learning_rate=0.5):
    x2_star = x2
    z_hat_original = np.argmax(net.predict_proba(np.vstack((x1, x2)).T))
    z_hat = z_hat_original
    epsilon = 0
    iterations = 0
    while z_hat == z_hat_original:
        iterations += 1
        gradient = get_gradient(x1, x2_star, net, z_hat_original)
        epsilon += learning_rate * -gradient
        x2_star = x2 + epsilon
        z_hat = np.argmax(net.predict_proba(np.vstack((x1, x2_star)).T))
    return x2_star, epsilon, iterations

In [19]:
model_index = 3
cur_x1 = dev_prepared[2,cols[model_index][:300]].reshape((-1,1))
cur_x2 = dev_prepared[2,cols[model_index][300:]].reshape((-1,1))

original_pred = clf[model_index].predict(np.vstack((cur_x1, cur_x2)).T)
x2_star, epsilon, iterations = pertubate_input(cur_x1, cur_x2, clf[model_index])
pert_pred = clf[model_index].predict(np.vstack((cur_x1, x2_star)).T)
print(iterations)
print(np.sum(epsilon * epsilon))
print(original_pred)
print(pert_pred)

ms_x1 = nlp.vocab.strings[nlp.vocab.vectors.most_similar(cur_x1.T, n=1)[0][0]]
ms_x2 = nlp.vocab.strings[nlp.vocab.vectors.most_similar(cur_x2.T, n=1)[0][0]]
pos_x2 = nlp(ms_x2)[0].pos_
print(ms_x1, ms_x2)
ms_x2_star = [nlp.vocab.strings[id] for id in nlp.vocab.vectors.most_similar(x2_star.T, n=10)[0][0,:]]

for term in ms_x2_star:
    nlp_x2_star = nlp(term)[0]
    pos_x2_star = nlp_x2_star.pos_
    if pos_x2_star == pos_x2 and term != ms_x2:
        ms_x2_star = term
        x2_star = nlp_x2_star.vector.reshape((-1,1))
        break
pert_pred = clf[model_index].predict(np.vstack((cur_x1, x2_star)).T)


while original_pred == pert_pred:
    epsilon = 1.1 * epsilon
    x2_star = cur_x2 + epsilon
    ms_x2_star = [nlp.vocab.strings[id] for id in nlp.vocab.vectors.most_similar(x2_star.T, n=10)[0][0,:]]
    for term in ms_x2_star:
        nlp_x2_star = nlp(term)[0]
        pos_x2_star = nlp_x2_star.pos_
        if pos_x2_star == pos_x2 and term != ms_x2:
            ms_x2_star = term
            x2_star = nlp_x2_star.vector.reshape((-1,1))
            break
    pert_pred = clf[model_index].predict(np.vstack((cur_x1, x2_star)).T)
epsilon = x2_star - cur_x2
print(pert_pred)
print(model_names[model_index])
print(np.sum(epsilon * epsilon))
print(f"Changed from {ms_x1, ms_x2} to {ms_x1, ms_x2_star}")

62
4.160986167201184
['contradiction']
['entailment']
embracing fighting
['entailment']
V1-V2
674.6572885480628
Changed from ('embracing', 'fighting') to ('embracing', 'dogfighting')


In [14]:
x2_star.reshape((-1,1))

array([[ 6.0342e-02],
       [-6.9463e-01],
       [-1.9652e+00],
       [ 2.3819e-01],
       [ 1.5806e+00],
       [ 1.1793e+00],
       [ 1.6456e+00],
       [ 2.2771e+00],
       [-4.2130e-01],
       [-7.6174e-01],
       [ 1.9682e+00],
       [ 7.3408e-01],
       [-1.1922e+00],
       [ 7.4786e-01],
       [ 3.0677e+00],
       [ 3.2473e+00],
       [ 2.5065e+00],
       [-1.6465e-01],
       [-8.9224e-01],
       [ 7.7612e-01],
       [ 8.2401e-01],
       [ 1.5025e+00],
       [-1.9849e-01],
       [ 6.5685e-01],
       [ 1.7082e+00],
       [-2.2786e-01],
       [-2.5034e+00],
       [-2.3373e+00],
       [-1.6716e-01],
       [-2.9230e-01],
       [ 3.2595e+00],
       [ 1.1605e+00],
       [ 9.1680e-01],
       [-1.1685e+00],
       [ 1.4924e-02],
       [-1.3162e+00],
       [-3.0498e-01],
       [ 2.4476e+00],
       [-1.3953e-02],
       [-8.1036e-01],
       [ 7.5154e-01],
       [-1.3571e+00],
       [ 1.6972e-01],
       [-5.6768e-01],
       [-2.8128e+00],
       [ 8

In [10]:
np.vstack((cur_x1, x2_star)).T

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 1 and the array at index 1 has size 300

In [67]:
[nlp.vocab.strings[id] for id in nlp.vocab.vectors.most_similar(x2_star.T, n=10)[0][0,:]]

['activewear',
 'promoting',
 'responsorial',
 'promote',
 'broadminded',
 'dedicating',
 'promotees',
 'responsory',
 'respons',
 'mobilizing']

In [65]:
nlp.vocab.vectors.most_similar(x2_star.T, n=10)[0][0,:]

array([10844699138153907478,  1027099120411152061,  2080108370443471494,
       11790474613361244676,  6892858849450375015,  6553990832704373837,
         804403426522879558, 16942184799860767510, 15444821592234117941,
       17462773503239410202], dtype=uint64)

In [434]:
np.sum(cur_x1 * cur_x1)

1749.379310202855

In [190]:
for i in range(10):
    cur_inp = np.random.rand(600).reshape((1,-1)) * 4
    pred = clf.predict(cur_inp)[0]
    pert_pred = clf.predict(deepfool(cur_inp, clf, num_classes=3, overshoot=0.02, max_iter=50)[-1])[0]
    if pert_pred == pred:
        print(i)
        print(pred)
        print(pert_pred)

0
contradiction
contradiction
1
neutral
neutral
2
contradiction
contradiction
3
neutral
neutral
4
neutral
neutral
5
contradiction
contradiction
6
neutral
neutral
7
contradiction
contradiction
8
neutral
neutral
9
neutral
neutral


array(['entailment'], dtype='<U13')

In [83]:
np.array([5, ] * 600).shape

(600,)

In [82]:
get_gradient(np.array([5, ] * 600), clf).shape

(600,)