In [1]:
import numpy as np
from tensorflow import set_random_seed
seed = 1
np.random.seed(seed)
set_random_seed(seed)

from shift_detector import *
from shift_locator import *
from shift_applicator import *
from data_utils import *
import os
import sys
from exp_utils import *

# -------------------------------------------------
# PLOTTING HELPERS
# -------------------------------------------------

import matplotlib.pyplot as plt
from matplotlib import rc
rc('font',**{'family':'serif','serif':['Times']})
rc('text', usetex=True)
rc('axes', labelsize=20)
rc('xtick', labelsize=20)
rc('ytick', labelsize=20)
rc('legend', fontsize=12)

def clamp(val, minimum=0, maximum=255):
    if val < minimum:
        return minimum
    if val > maximum:
        return maximum
    return val

def colorscale(hexstr, scalefactor):
    hexstr = hexstr.strip('#')

    if scalefactor < 0 or len(hexstr) != 6:
        return hexstr

    r, g, b = int(hexstr[:2], 16), int(hexstr[2:4], 16), int(hexstr[4:], 16)

    r = clamp(r * scalefactor)
    g = clamp(g * scalefactor)
    b = clamp(b * scalefactor)

    return "#%02x%02x%02x" % (int(r), int(g), int(b))

linestyles = ['-', '-.', '--', ':']
brightness = [1.25, 1.0, 0.75, 0.5]
format = ['-o', '-h', '-p', '-s', '-D', '-<', '->', '-X']
markers = ['o', 'h', 'p', 's', 'D', '<', '>', 'X']
colors_old = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728',
              '#9467bd', '#8c564b', '#e377c2', '#7f7f7f',
              '#bcbd22', '#17becf']
colors = ['#2196f3', '#f44336', '#9c27b0', '#64dd17', '#009688', '#ff9800', '#795548', '#607d8b']

def errorfill(x, y, yerr, color=None, alpha_fill=0.2, ax=None, fmt='-o', label=None):
    ax = ax if ax is not None else plt.gca()
    if color is None:
        color = next(ax._get_lines.prop_cycler)['color']
    if np.isscalar(yerr) or len(yerr) == len(y):
        ymin = y - yerr
        ymax = y + yerr
    elif len(yerr) == 2:
        ymin, ymax = yerr
    ax.semilogx(x, y, fmt, color=color, label=label)
    ax.fill_between(x, np.clip(ymax, 0, 1), np.clip(ymin, 0, 1), color=color, alpha=alpha_fill)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
# -------------------------------------------------
# CONFIG
# -------------------------------------------------

make_keras_picklable()
np.set_printoptions(threshold=sys.maxsize)

sys.argv = ['pipeline.py', 'mnist', 'orig', 'univ']

datset = sys.argv[1]
test_type = sys.argv[3]

path = './paper_results/'
path += test_type + '/'
path += datset + '_'
path += sys.argv[2] + '/'

if not os.path.exists(path):
    os.makedirs(path)

# Define DR methods
dr_techniques = [DimensionalityReduction.NoRed.value, DimensionalityReduction.PCA.value, DimensionalityReduction.SRP.value, DimensionalityReduction.UAE.value, DimensionalityReduction.TAE.value, DimensionalityReduction.BBSDs.value, DimensionalityReduction.BBSDh.value]
if test_type == 'multiv':
    dr_techniques = [DimensionalityReduction.NoRed.value, DimensionalityReduction.PCA.value, DimensionalityReduction.SRP.value, DimensionalityReduction.UAE.value, DimensionalityReduction.TAE.value, DimensionalityReduction.BBSDs.value]
if test_type == 'univ':
    dr_techniques_plot = dr_techniques.copy()
    dr_techniques_plot.append(DimensionalityReduction.Classif.value)
else:
    dr_techniques_plot = dr_techniques.copy()

# Define test types and general test sample sizes
test_types = [td.value for td in TestDimensionality]
if test_type == 'multiv':
    od_tests = []
    md_tests = [MultidimensionalTest.MMD.value]
    samples = [10, 20, 50, 100, 200, 500, 1000]
else:
#     od_tests = [od.value for od in OnedimensionalTest]
    od_tests = [OnedimensionalTest.KS.value]
    md_tests = []
    samples = [10, 20, 50, 100, 200, 500, 1000, 9000]
difference_samples = 10

# Number of random runs to average results over    
random_runs = 2

# Signifiance level
sign_level = 0.05

# Define shift types
if sys.argv[2] == 'small_gn_shift':
    shifts = ['rand','small_gn_shift_0.1', 'small_gn_shift_0.5', 'small_gn_shift_1.0']
elif sys.argv[2] == 'medium_gn_shift':
    shifts = ['rand','medium_gn_shift_0.1', 'medium_gn_shift_0.5', 'medium_gn_shift_1.0']
elif sys.argv[2] == 'large_gn_shift':
    shifts = ['rand', 'large_gn_shift_0.1', 'large_gn_shift_0.5', 'large_gn_shift_1.0']
elif sys.argv[2] == 'adversarial_shift':
    shifts = ['rand','adversarial_shift_0.1', 'adversarial_shift_0.5', 'adversarial_shift_1.0']
elif sys.argv[2] == 'ko_shift':
    shifts = ['rand','ko_shift_0.1', 'ko_shift_0.5', 'ko_shift_1.0']
    if test_type == 'univ':
        samples = [10, 20, 50, 100, 200, 500, 1000, 9000]
elif sys.argv[2] == 'orig':
    shifts = ['rand', 'orig']
    brightness = [1.25, 0.75]
elif sys.argv[2] == 'small_image_shift':
    shifts = ['rand', 'small_img_shift_0.1', 'small_img_shift_0.5', 'small_img_shift_1.0']
elif sys.argv[2] == 'medium_image_shift':
    shifts = ['rand','medium_img_shift_0.1', 'medium_img_shift_0.5', 'medium_img_shift_1.0']
elif sys.argv[2] == 'large_image_shift':
    shifts = ['rand','large_img_shift_0.1', 'large_img_shift_0.5', 'large_img_shift_1.0']
elif sys.argv[2] == 'medium_img_shift+ko_shift':
    shifts = ['rand', 'medium_img_shift_0.5+ko_shift_0.1', 'medium_img_shift_0.5+ko_shift_0.5', 'medium_img_shift_0.5+ko_shift_1.0']
    if test_type == 'univ':
        samples = [10, 20, 50, 100, 200, 500, 1000, 9000]
elif sys.argv[2] == 'only_zero_shift+medium_img_shift':
    shifts = ['rand', 'only_zero_shift+medium_img_shift_0.1', 'only_zero_shift+medium_img_shift_0.5', 'only_zero_shift+medium_img_shift_1.0']
    samples = [10, 20, 50, 100, 200, 500, 1000]
else:
    shifts = []

In [3]:
samples_shifts_rands_dr_tech = np.ones((len(samples), len(shifts), random_runs, len(dr_techniques) + 1)) * (-1)

red_dim = -1
red_models = [None] * len(DimensionalityReduction)

In [4]:
shift_idx, shift = 0, 'rand'
shift_path = path + shift + '/'
if not os.path.exists(shift_path):
    os.makedirs(shift_path)

rand_run_p_vals = np.ones((len(samples), len(dr_techniques) + 1, random_runs)) * (-1)

In [5]:
rand_run = 0

print("Random run %s" % rand_run)

rand_run_path = shift_path + str(rand_run) + '/'
if not os.path.exists(rand_run_path):
    os.makedirs(rand_run_path)

np.random.seed(rand_run)
set_random_seed(rand_run)

# Load data
(X_tr_orig, y_tr_orig), (X_val_orig, y_val_orig), (X_te_orig, y_te_orig), orig_dims, nb_classes = import_dataset(datset, shuffle=True)
X_tr_orig = normalize_datapoints(X_tr_orig, 255.)
X_te_orig = normalize_datapoints(X_te_orig, 255.)
X_val_orig = normalize_datapoints(X_val_orig, 255.)

Random run 0
(28, 28, 1)


In [6]:
if shift == 'orig':
    print('Original')
    (X_tr_orig, y_tr_orig), (X_val_orig, y_val_orig), (X_te_orig, y_te_orig), orig_dims, nb_classes = import_dataset(datset)
    X_tr_orig = normalize_datapoints(X_tr_orig, 255.)
    X_te_orig = normalize_datapoints(X_te_orig, 255.)
    X_val_orig = normalize_datapoints(X_val_orig, 255.)
    X_te_1 = X_te_orig.copy()
    y_te_1 = y_te_orig.copy()
else:
    (X_te_1, y_te_1) = apply_shift(X_te_orig, y_te_orig, shift, orig_dims, datset)

X_te_2 , y_te_2 = random_shuffle(X_te_1, y_te_1)

Randomized


In [7]:
si, sample = 0, 10

print("Sample %s" % sample)

sample_path = rand_run_path + str(sample) + '/'
if not os.path.exists(sample_path):
    os.makedirs(sample_path)

X_te_3 = X_te_2[:sample,:]
x_te_3_samp = X_te_3[0]
y_te_3 = y_te_2[:sample]

if test_type == 'multiv':
    X_val_3 = X_val_orig[:1000,:]
    y_val_3 = y_val_orig[:1000]
else:
    X_val_3 = np.copy(X_val_orig)
    y_val_3 = np.copy(y_val_orig)

X_tr_3 = np.copy(X_tr_orig)
y_tr_3 = np.copy(y_tr_orig)

Sample 10


In [104]:
dr_ind, dr_technique = 6, 6
shift_reductor = ShiftReductor(X_tr_3, y_tr_3, X_val_3, y_val_3, DimensionalityReduction(dr_technique), orig_dims, datset, dr_amount=32)
shift_reductor_model = shift_reductor.fit_reductor()

Train on 50000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


In [108]:
shift_reductor_model.save('./saved_models/test.h5')

In [129]:
from keras.utils.io_utils import H5Dict

h5dict = H5Dict('./saved_models/test.h5', mode='r')
# model_from_config(model_config, custom_objects=custom_objects)

In [141]:
import json

model_config = h5dict['model_config']
model_config = json.loads(model_config.decode('utf-8'))

In [176]:
from keras.engine.saving import model_from_config
from keras.layers import deserialize
from keras import models
from keras.utils.generic_utils import deserialize_keras_object, has_arg, _GLOBAL_CUSTOM_OBJECTS

globs = globals()  # All layers.
globs['Model'] = models.Model
globs['Sequential'] = models.Sequential

class_name = model_config['class_name']

# d = deserialize_keras_object(model_config,
#                             module_objects=globs,
#                             custom_objects=keras_resnet.custom_objects,
#                             printable_module_name='layer')
# d = deserialize(model_config, custom_objects=keras_resnet.custom_objects)
# model = model_from_config(model_config, custom_objects=keras_resnet.custom_objects)

In [170]:
model_config

{'class_name': 'ResNet2D18',
 'config': {'name': 'resnet2d18_3',
  'layers': [{'name': 'input_8',
    'class_name': 'InputLayer',
    'config': {'batch_input_shape': [None, 28, 28, 1],
     'dtype': 'float32',
     'sparse': False,
     'name': 'input_8'},
    'inbound_nodes': []},
   {'name': 'conv1',
    'class_name': 'Conv2D',
    'config': {'name': 'conv1',
     'trainable': True,
     'dtype': 'float32',
     'filters': 64,
     'kernel_size': [7, 7],
     'strides': [2, 2],
     'padding': 'same',
     'data_format': 'channels_last',
     'dilation_rate': [1, 1],
     'activation': 'linear',
     'use_bias': False,
     'kernel_initializer': {'class_name': 'VarianceScaling',
      'config': {'scale': 1.0,
       'mode': 'fan_avg',
       'distribution': 'uniform',
       'seed': None}},
     'bias_initializer': {'class_name': 'Zeros', 'config': {}},
     'kernel_regularizer': None,
     'bias_regularizer': None,
     'activity_regularizer': None,
     'kernel_constraint': None,
 

In [177]:
cls = globs.get(class_name)

In [179]:
cls.from_config(
            model_config['config'],
            custom_objects=dict(list(_GLOBAL_CUSTOM_OBJECTS.items()) +
                                list(keras_resnet.custom_objects.items())))

AttributeError: 'list' object has no attribute 'dtype'

In [191]:
from keras.engine.network import Network

config = model_config['config']

for layer_data in config['layers']:
    process_layer(layer_data)

NameError: name 'process_layer' is not defined

In [230]:
from keras.utils.generic_utils import unpack_singleton

# Layer instances created during
# the graph reconstruction process
created_layers = {}

# Dictionary mapping layer instances to
# node data that specifies a layer call.
# It acts as a queue that maintains any unprocessed
# layer call until it becomes possible to process it
# (i.e. until the input tensors to the call all exist).
unprocessed_nodes = {}

def add_unprocessed_node(layer, node_data):
    """Add node to layer list
    # Arguments
        layer: layer object
        node_data: Node data specifying layer call
    """
    if layer not in unprocessed_nodes:
        unprocessed_nodes[layer] = [node_data]
    else:
        unprocessed_nodes[layer].append(node_data)

def process_node(layer, node_data):
    """Reconstruct node by linking to inbound layers
    # Arguments
        layer: Layer to process
        node_data: List of layer configs
    # Raises
        ValueError: For incorrect layer config
        LookupError: If layer required is not found
    """
    input_tensors = []
    for input_data in node_data:
        inbound_layer_name = input_data[0]
        inbound_node_index = input_data[1]
        inbound_tensor_index = input_data[2]
        if len(input_data) == 3:
            kwargs = {}
        elif len(input_data) == 4:
            kwargs = input_data[3]
        else:
            raise ValueError('Improperly formatted model config.')
        inbound_layer = created_layers[inbound_layer_name]
        # Raise an error if the corresponding layer node
        # has not yet been created
        if len(inbound_layer._inbound_nodes) <= inbound_node_index:
            raise LookupError
        inbound_node = inbound_layer._inbound_nodes[inbound_node_index]
        input_tensors.append(
            inbound_node.output_tensors[inbound_tensor_index])

    # Call layer on its inputs, thus creating the node
    # and building the layer if needed.
    if input_tensors:
        layer(unpack_singleton(input_tensors), **kwargs)

def process_layer(layer_data):
    """Deserializes a layer, then call it on appropriate inputs.
    # Arguments
        layer_data: layer config dict.
    # Raises
        ValueError: In case of improperly formatted `layer_data` dict.
    """
    layer_name = layer_data['name']

    # Instantiate layer.
    from keras.layers import deserialize as deserialize_layer

    layer = deserialize_layer(layer_data,
                              custom_objects=custom_objects)
    created_layers[layer_name] = layer

    # Gather layer inputs.
    inbound_nodes_data = layer_data['inbound_nodes']
    for node_data in inbound_nodes_data:
        # We don't process nodes (i.e. make layer calls)
        # on the fly because the inbound node may not yet exist,
        # in case of layer shared at different topological depths
        # (e.g. a model such as A(B(A(B(x)))))
        add_unprocessed_node(layer, node_data)

# First, we create all layers and enqueue nodes to be processed
for layer_data in config['layers']:
    process_layer(layer_data)

# Then we process nodes in order of layer depth.
# Nodes that cannot yet be processed (if the inbound node
# does not yet exist) are re-enqueued, and the process
# is repeated until all nodes are processed.
while unprocessed_nodes:
    for layer_data in config['layers']:
        layer = created_layers[layer_data['name']]

        # Process all nodes in layer, if not yet processed
        if layer in unprocessed_nodes:
            node_data_list = unprocessed_nodes[layer]

            # Process nodes in order
            node_index = 0
            while node_index < len(node_data_list):
                node_data = node_data_list[node_index]
                try:
                    process_node(layer, node_data)

                # If the node does not have all inbound layers
                # available, stop processing and continue later
                except LookupError:
                    break

                node_index += 1

            # If not all nodes processed then store unprocessed nodes
            if node_index < len(node_data_list):
                unprocessed_nodes[layer] = node_data_list[node_index:]
            # If all nodes processed remove the layer
            else:
                del unprocessed_nodes[layer]

In [231]:
name = config.get('name')
input_tensors = []
output_tensors = []
for layer_data in config['input_layers']:
    layer_name, node_index, tensor_index = layer_data
    assert layer_name in created_layers
    layer = created_layers[layer_name]
    layer_output_tensors = layer._inbound_nodes[node_index].output_tensors
    input_tensors.append(layer_output_tensors[tensor_index])
for layer_data in config['output_layers']:
    layer_name, node_index, tensor_index = layer_data
    assert layer_name in created_layers
    layer = created_layers[layer_name]
    layer_output_tensors = layer._inbound_nodes[node_index].output_tensors
    output_tensors.append(layer_output_tensors[tensor_index])

In [233]:
blocks = [2, 2, 2, 2]
numerical_names = [True] * len(blocks)
axis = 3
freeze_bn = False
block = keras_resnet.blocks.basic_2d
classes=1000

# cls(inputs=input_tensors[0], outputs=output_tensors, name=name)
x = keras.layers.Conv2D(64, (7, 7), strides=(2, 2), use_bias=False, name="conv1", padding="same")(input_tensors[0])
x = keras_resnet.layers.BatchNormalization(axis=axis, epsilon=1e-5, freeze=freeze_bn, name="bn_conv1")(x)
x = keras.layers.Activation("relu", name="conv1_relu")(x)
x = keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding="same", name="pool1")(x)

features = 64

outputs = []

for stage_id, iterations in enumerate(blocks):
    for block_id in range(iterations):
        x = block(
            features,
            stage_id,
            block_id,
            numerical_name=(block_id > 0 and numerical_names[stage_id]),
            freeze_bn=freeze_bn
        )(x)

    features *= 2

    outputs.append(x)
    
x = keras.layers.GlobalAveragePooling2D(name="pool5")(x)
x = keras.layers.Dense(classes, activation="softmax", name="fc1000")(x)

keras.Model.__init__(inputs=input_tensors, outputs=outputs, name=name)

IndexError: tuple index out of range

In [224]:
x

<tf.Tensor 'fc1000_28/Softmax:0' shape=(?, 1000) dtype=float32>

In [199]:
input_tensors

[<tf.Tensor 'input_8_9:0' shape=(?, 28, 28, 1) dtype=float32>]

In [196]:
output_tensors

[<tf.Tensor 'fc1000_21/Softmax:0' shape=(?, 10) dtype=float32>]

In [184]:
model_config['config']

{'name': 'resnet2d18_3',
 'layers': [{'name': 'input_8',
   'class_name': 'InputLayer',
   'config': {'batch_input_shape': [None, 28, 28, 1],
    'dtype': 'float32',
    'sparse': False,
    'name': 'input_8'},
   'inbound_nodes': []},
  {'name': 'conv1',
   'class_name': 'Conv2D',
   'config': {'name': 'conv1',
    'trainable': True,
    'dtype': 'float32',
    'filters': 64,
    'kernel_size': [7, 7],
    'strides': [2, 2],
    'padding': 'same',
    'data_format': 'channels_last',
    'dilation_rate': [1, 1],
    'activation': 'linear',
    'use_bias': False,
    'kernel_initializer': {'class_name': 'VarianceScaling',
     'config': {'scale': 1.0,
      'mode': 'fan_avg',
      'distribution': 'uniform',
      'seed': None}},
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'kernel_regularizer': None,
    'bias_regularizer': None,
    'activity_regularizer': None,
    'kernel_constraint': None,
    'bias_constraint': None},
   'inbound_nodes': [[['input_8', 0, 0, {

In [127]:
from keras.utils import CustomObjectScope
from keras_resnet.models import ResNet2D18

with CustomObjectScope({'BatchNormalization': keras_resnet.layers._batch_normalization.BatchNormalization}):
#     model = load_model('./saved_models/test.h5', custom_objects=keras_resnet.custom_objects)
    model = load_model('./saved_models/test.h5')

ValueError: Unknown layer: ResNet2D18

In [106]:
shift_reductor_model.summary()

Model: "resnet2d18_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_8 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 14, 14, 64)   3136        input_8[0][0]                    
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 14, 14, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
conv1_relu (Activation)         (None, 14, 14, 64)   0           bn_conv1[0][0]                   
_______________________________________________________________________________________

In [8]:
# Detect shift
shift_detector = ShiftDetector(dr_techniques, test_types, od_tests, md_tests, sign_level, red_models, sample, datset)
(od_decs, ind_od_decs, ind_od_p_vals), (md_decs, ind_md_decs, ind_md_p_vals), red_dim, red_models, val_acc, te_acc = shift_detector.detect_data_shift(X_tr_3, y_tr_3, X_val_3, y_val_3, X_te_3, y_te_3, orig_dims, nb_classes)

NoRed
PCA
SRP
UAE






TAE
BBSDs
Train on 50000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
BBSDh


ValueError: Unknown layer: ResNet2D18

In [111]:
import importlib
importlib.reload(keras_resnet)

<module 'keras_resnet' from '/anaconda3/lib/python3.6/site-packages/keras_resnet/__init__.py'>

In [90]:
keras_resnet.custom_objects

{'BatchNormalization': keras_resnet.layers._batch_normalization.BatchNormalization,
 'ResNet2D18': keras_resnet.models._2d.ResNet2D18}

In [89]:
a=keras_resnet.custom_objects
a['ResNet2D18'] = ResNet2D18

In [79]:
keras.layers.Input(orig_dims).dtype

tf.float32

In [88]:
from keras_resnet.models import ResNet2D18

In [92]:
mod_path='./saved_models/mnist_standard_class_model.h5'
load_model(mod_path, custom_objects=keras_resnet.custom_objects)

AttributeError: 'list' object has no attribute 'dtype'

In [94]:
X_te_orig.shape

(10000, 784)

In [None]:
# -------------------------------------------------
# PIPELINE START
# -------------------------------------------------

samples_shifts_rands_dr_tech = np.ones((len(samples), len(shifts), random_runs, len(dr_techniques) + 1)) * (-1)

red_dim = -1
red_models = [None] * len(DimensionalityReduction)

for shift_idx, shift in enumerate(shifts):

    shift_path = path + shift + '/'
    if not os.path.exists(shift_path):
        os.makedirs(shift_path)

    rand_run_p_vals = np.ones((len(samples), len(dr_techniques) + 1, random_runs)) * (-1)

    for rand_run in range(random_runs):

        print("Random run %s" % rand_run)

        rand_run_path = shift_path + str(rand_run) + '/'
        if not os.path.exists(rand_run_path):
            os.makedirs(rand_run_path)

        np.random.seed(rand_run)
        set_random_seed(rand_run)

        # Load data
        (X_tr_orig, y_tr_orig), (X_val_orig, y_val_orig), (X_te_orig, y_te_orig), orig_dims, nb_classes = import_dataset(datset, shuffle=True)
        X_tr_orig = normalize_datapoints(X_tr_orig, 255.)
        X_te_orig = normalize_datapoints(X_te_orig, 255.)
        X_val_orig = normalize_datapoints(X_val_orig, 255.)

        # Apply shift
        if shift == 'orig':
            print('Original')
            (X_tr_orig, y_tr_orig), (X_val_orig, y_val_orig), (X_te_orig, y_te_orig), orig_dims, nb_classes = import_dataset(datset)
            X_tr_orig = normalize_datapoints(X_tr_orig, 255.)
            X_te_orig = normalize_datapoints(X_te_orig, 255.)
            X_val_orig = normalize_datapoints(X_val_orig, 255.)
            X_te_1 = X_te_orig.copy()
            y_te_1 = y_te_orig.copy()
        else:
            (X_te_1, y_te_1) = apply_shift(X_te_orig, y_te_orig, shift, orig_dims, datset)

        X_te_2 , y_te_2 = random_shuffle(X_te_1, y_te_1)

        # Check detection performance for different numbers of samples from test
        for si, sample in enumerate(samples):

            print("Sample %s" % sample)

            sample_path = rand_run_path + str(sample) + '/'
            if not os.path.exists(sample_path):
                os.makedirs(sample_path)

            X_te_3 = X_te_2[:sample,:]
            x_te_3_samp = X_te_3[0]
            y_te_3 = y_te_2[:sample]

            if test_type == 'multiv':
                X_val_3 = X_val_orig[:1000,:]
                y_val_3 = y_val_orig[:1000]
            else:
                X_val_3 = np.copy(X_val_orig)
                y_val_3 = np.copy(y_val_orig)

            X_tr_3 = np.copy(X_tr_orig)
            y_tr_3 = np.copy(y_tr_orig)

            # Detect shift
            shift_detector = ShiftDetector(dr_techniques, test_types, od_tests, md_tests, sign_level, red_models, sample, datset)
            (od_decs, ind_od_decs, ind_od_p_vals), (md_decs, ind_md_decs, ind_md_p_vals), red_dim, red_models, val_acc, te_acc = shift_detector.detect_data_shift(X_tr_3, y_tr_3, X_val_3, y_val_3, X_te_3, y_te_3, orig_dims, nb_classes)

            if test_type == 'multiv':
                print("Shift decision: ", ind_md_decs.flatten())
                print("Shift p-vals: ", ind_md_p_vals.flatten())

                rand_run_p_vals[si,:,rand_run] = ind_md_p_vals.flatten()
            else:
                print("Shift decision: ", ind_od_decs.flatten())
                print("Shift p-vals: ", ind_od_p_vals.flatten())

                # Characterize shift via difference classifier
                shift_locator = ShiftLocator(orig_dims, dc=DifferenceClassifier.FFNNDCL, sign_level=sign_level)
                model, score, (X_tr_dcl, y_tr_dcl, X_te_dcl, y_te_dcl) = shift_locator.build_model(X_tr_3, X_te_3)
                test_indices, test_perc, dec, p_val = shift_locator.most_likely_shifted_samples(model, X_te_dcl, y_te_dcl)

                rand_run_p_vals[si,:,rand_run] = np.append(ind_od_p_vals.flatten(), p_val)

                if datset == 'mnist' or datset == 'mnist_usps' or datset == 'mnist_usps':
                    samp_shape = (28,28)
                    cmap = 'gray'
                elif datset == 'cifar10' or datset == 'svhn':
                    samp_shape = (32,32,3)
                    cmap = None
                
                if dec:
                    most_conf_test_indices = test_indices[test_perc > 0.8]

                    top_same_samples_path = sample_path + 'top_same'
                    if not os.path.exists(top_same_samples_path):
                        os.makedirs(top_same_samples_path)

                    rev_top_test_ind = test_indices[::-1][:difference_samples]
                    least_conf_samples = X_te_dcl[rev_top_test_ind]
                    for j in range(len(rev_top_test_ind)):
                        samp = least_conf_samples[j, :]
                        fig = plt.imshow(samp.reshape(samp_shape), cmap=cmap)
                        plt.axis('off')
                        fig.axes.get_xaxis().set_visible(False)
                        fig.axes.get_yaxis().set_visible(False)
                        plt.savefig("%s/%s.pdf" % (top_same_samples_path, j), bbox_inches='tight', pad_inches=0)
                        plt.clf()

                        j = j + 1

                    top_different_samples_path = sample_path + 'top_diff'
                    if not os.path.exists(top_different_samples_path):
                        os.makedirs(top_different_samples_path)

                    most_conf_samples = X_te_dcl[most_conf_test_indices]
                    original_indices = []
                    j = 0
                    for i in range(len(most_conf_samples)):
                        samp = most_conf_samples[i,:]
                        ind = np.where(np.all(X_te_3==samp,axis=1))
                        if len(ind[0]) > 0:
                            original_indices.append(np.asscalar(ind[0]))

                            if j < difference_samples:
                                fig = plt.imshow(samp.reshape(samp_shape), cmap=cmap)
                                plt.axis('off')
                                fig.axes.get_xaxis().set_visible(False)
                                fig.axes.get_yaxis().set_visible(False)
                                plt.savefig("%s/%s.pdf" % (top_different_samples_path,j), bbox_inches='tight', pad_inches = 0)
                                plt.clf()

                                j = j + 1

        for dr_idx, dr in enumerate(dr_techniques_plot):
            plt.semilogx(np.array(samples), rand_run_p_vals[:,dr_idx,rand_run], format[dr], color=colors[dr], label="%s" % DimensionalityReduction(dr).name)
        plt.axhline(y=sign_level, color='k')
        plt.xlabel('Number of samples from test')
        plt.ylabel('$p$-value')
        plt.savefig("%s/dr_sample_comp_noleg.pdf" % rand_run_path, bbox_inches='tight')
        plt.legend()
        plt.savefig("%s/dr_sample_comp.pdf" % rand_run_path, bbox_inches='tight')
        plt.clf()

        np.savetxt("%s/dr_method_p_vals.csv" % rand_run_path, rand_run_p_vals[:,:,rand_run], delimiter=",")

        np.random.seed(seed)
        set_random_seed(seed)

    mean_p_vals = np.mean(rand_run_p_vals, axis=2)
    std_p_vals = np.std(rand_run_p_vals, axis=2)

    for dr_idx, dr in enumerate(dr_techniques_plot):
        errorfill(np.array(samples), mean_p_vals[:,dr_idx], std_p_vals[:,dr_idx], fmt=format[dr], color=colors[dr], label="%s" % DimensionalityReduction(dr).name)
    plt.axhline(y=sign_level, color='k')
    plt.xlabel('Number of samples from test')
    plt.ylabel('$p$-value')
    plt.savefig("%s/dr_sample_comp_noleg.pdf" % shift_path, bbox_inches='tight')
    plt.legend()
    plt.savefig("%s/dr_sample_comp.pdf" % shift_path, bbox_inches='tight')
    plt.clf()

    for dr_idx, dr in enumerate(dr_techniques_plot):
        errorfill(np.array(samples), mean_p_vals[:,dr_idx], std_p_vals[:,dr_idx], fmt=format[dr], color=colors[dr])
        plt.xlabel('Number of samples from test')
        plt.ylabel('$p$-value')
        plt.axhline(y=sign_level, color='k', label='sign_level')
        plt.savefig("%s/%s_conf.pdf" % (shift_path, DimensionalityReduction(dr).name), bbox_inches='tight')
        plt.clf()

    np.savetxt("%s/mean_p_vals.csv" % shift_path, mean_p_vals, delimiter=",")
    np.savetxt("%s/std_p_vals.csv" % shift_path, std_p_vals, delimiter=",")

    for dr_idx, dr in enumerate(dr_techniques_plot):
        samples_shifts_rands_dr_tech[:,shift_idx,:,dr_idx] = rand_run_p_vals[:,dr_idx,:]

    np.save("%s/samples_shifts_rands_dr_tech.npy" % (path), samples_shifts_rands_dr_tech)

for dr_idx, dr in enumerate(dr_techniques_plot):
    dr_method_results = samples_shifts_rands_dr_tech[:,:,:,dr_idx]

    mean_p_vals = np.mean(dr_method_results, axis=2)
    std_p_vals = np.std(dr_method_results, axis=2)

    for idx, shift in enumerate(shifts):
        errorfill(np.array(samples), mean_p_vals[:, idx], std_p_vals[:, idx], fmt=linestyles[idx]+markers[dr], color=colorscale(colors[dr],brightness[idx]), label="%s" % shift.replace('_', '\\_'))
    plt.xlabel('Number of samples from test')
    plt.ylabel('$p$-value')
    plt.axhline(y=sign_level, color='k')
    plt.savefig("%s/%s_conf_noleg.pdf" % (path, DimensionalityReduction(dr).name), bbox_inches='tight')
    plt.legend()
    plt.savefig("%s/%s_conf.pdf" % (path, DimensionalityReduction(dr).name), bbox_inches='tight')
    plt.clf()

np.save("%s/samples_shifts_rands_dr_tech.npy" % (path), samples_shifts_rands_dr_tech)