In [19]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import random

In [20]:
from tensorflow.keras import datasets, layers, models
import logging

In [24]:
def set_memory_growth():
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
                logical_gpus = tf.config.experimental.list_logical_devices(
                    'GPU')
                logging.info(
                    "Detect {} Physical GPUs, {} Logical GPUs.".format(
                        len(gpus), len(logical_gpus)))
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            logging.info(e)

In [25]:
set_memory_growth()

In [21]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [34]:
a=[]

In [35]:
b=[[1],[2]]

In [36]:
a+=b

In [37]:
a

[[1], [2]]

In [40]:
a=np.array([[1],[2]])
b=np.array([[1],[2]])

In [44]:
c=[a,b]

In [45]:
tf.concat(c,axis=0)

<tf.Tensor: shape=(4, 1), dtype=int64, numpy=
array([[1],
       [2],
       [1],
       [2]])>

In [38]:
a=tf.Variable(tf.zeros(5))

In [39]:
a.numpy()

array([0., 0., 0., 0., 0.], dtype=float32)

In [29]:
normal_class = tuple([0])
known_outlier_classes = (1,3)
n_classes = 2  # 0: normal, 1: outlier
normal_classes = tuple(normal_class)
outlier_classes = list(range(0, 10))
for i in normal_class: outlier_classes.remove(i) 
outlier_classes = tuple(outlier_classes)

ratio_known_normal= 0.0
ratio_known_outlier= 0.01
ratio_pollution = 0.1

In [30]:
outlier_classes

(1, 2, 3, 4, 5, 6, 7, 8, 9)

In [31]:
[int(x) for  x in outlier_classes]

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [32]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [33]:
def create_semisupervised_setting(labels, normal_classes, outlier_classes, known_outlier_classes,
                                  ratio_known_normal, ratio_known_outlier, ratio_pollution):
    """
    Create a semi-supervised data setting. 
    :param labels: np.array with labels of all dataset samples
    :param normal_classes: tuple with normal class labels
    :param outlier_classes: tuple with anomaly class labels
    :param known_outlier_classes: tuple with known (labeled) anomaly class labels
    :param ratio_known_normal: the desired ratio of known (labeled) normal samples
    :param ratio_known_outlier: the desired ratio of known (labeled) anomalous samples
    :param ratio_pollution: the desired pollution ratio of the unlabeled data with unknown (unlabeled) anomalies.
    :return: tuple with list of sample indices, list of original labels, and list of semi-supervised labels
    """
    idx_normal = np.argwhere(np.isin(labels, normal_classes)).flatten()
    idx_outlier = np.argwhere(np.isin(labels, outlier_classes)).flatten()
    idx_known_outlier_candidates = np.argwhere(np.isin(labels, known_outlier_classes)).flatten()

    n_normal = len(idx_normal)

    # Solve system of linear equations to obtain respective number of samples
    a = np.array([[1, 1, 0, 0],
                  [(1-ratio_known_normal), -ratio_known_normal, -ratio_known_normal, -ratio_known_normal],
                  [-ratio_known_outlier, -ratio_known_outlier, -ratio_known_outlier, (1-ratio_known_outlier)],
                  [0, -ratio_pollution, (1-ratio_pollution), 0]])
    b = np.array([n_normal, 0, 0, 0])
    x = np.linalg.solve(a, b)

    # Get number of samples
    n_known_normal = int(x[0])
    n_unlabeled_normal = int(x[1])
    n_unlabeled_outlier = int(x[2])
    n_known_outlier = int(x[3])

    # Sample indices
    perm_normal = np.random.permutation(n_normal)
    perm_outlier = np.random.permutation(len(idx_outlier))
    perm_known_outlier = np.random.permutation(len(idx_known_outlier_candidates))

    idx_known_normal = idx_normal[perm_normal[:n_known_normal]].tolist()
    idx_unlabeled_normal = idx_normal[perm_normal[n_known_normal:n_known_normal+n_unlabeled_normal]].tolist()
    idx_unlabeled_outlier = idx_outlier[perm_outlier[:n_unlabeled_outlier]].tolist()
    idx_known_outlier = idx_known_outlier_candidates[perm_known_outlier[:n_known_outlier]].tolist()

    # Get original class labels
    labels_known_normal = labels[idx_known_normal].tolist()
    labels_unlabeled_normal = labels[idx_unlabeled_normal].tolist()
    labels_unlabeled_outlier = labels[idx_unlabeled_outlier].tolist()
    labels_known_outlier = labels[idx_known_outlier].tolist()

    # Get semi-supervised setting labels
    semi_labels_known_normal = np.ones(n_known_normal).astype(np.int32).tolist()
    semi_labels_unlabeled_normal = np.zeros(n_unlabeled_normal).astype(np.int32).tolist()
    semi_labels_unlabeled_outlier = np.zeros(n_unlabeled_outlier).astype(np.int32).tolist()
    semi_labels_known_outlier = (-np.ones(n_known_outlier).astype(np.int32)).tolist()
    
#     print(len(labels_known_normal))
#     print(len(semi_labels_known_normal))
#     print(len(labels_unlabeled_normal))
#     print(len(semi_labels_unlabeled_normal))
#     print(len(labels_unlabeled_outlier))
#     print(len(semi_labels_unlabeled_outlier))
#     print(len(labels_known_outlier))
#     print(len(semi_labels_known_outlier))
#     print()
#     print(len(idx_known_outlier_candidates))
    
    
    # Create final lists
    list_idx = idx_known_normal + idx_unlabeled_normal + idx_unlabeled_outlier + idx_known_outlier
    list_labels = labels_known_normal + labels_unlabeled_normal + labels_unlabeled_outlier + labels_known_outlier
    list_semi_labels = (semi_labels_known_normal + semi_labels_unlabeled_normal + semi_labels_unlabeled_outlier
                        + semi_labels_known_outlier)

    return list_idx, list_labels, list_semi_labels

In [34]:
idx, ll, semi_targets = create_semisupervised_setting(y_train, normal_classes,
                                                             outlier_classes, known_outlier_classes,
                                                             ratio_known_normal, ratio_known_outlier, ratio_pollution)

In [52]:
train_data = tf.data.Dataset.from_tensor_slices((x_train[idx], y_train[idx], semi_targets))
train_data = train_data.shuffle(4096)

In [55]:
def normalize_img(image, label, semi_label):
    return tf.cast(image, tf.float32) / 255., label, semi_label

In [56]:
train_data = train_data.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [57]:
train_data = train_data.batch(8)
train_data = train_data.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [58]:
np.zeros_like(y_test)

array([0, 0, 0, ..., 0, 0, 0], dtype=uint8)

In [59]:
a = iter(train_data)

In [60]:
next(a)

(<tf.Tensor: shape=(8, 28, 28), dtype=float32, numpy=
 array([[[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]],
 
        [[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]],
 
        [[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.]],
 
        ...,
 
        [[0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         [0., 0., 0., ..., 0., 0., 0.],
         ...,
         [0., 0., 0., ...,

In [41]:
b=tf.constant([[1.,2.,3.,4.,5.],[1.,1.,1.,1.,1.]])

In [48]:
tf.add(a, b)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[1., 2., 3., 4., 5.],
       [1., 1., 1., 1., 1.]], dtype=float32)>

In [53]:
a+tf.reduce_sum(b,0)

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([2., 3., 4., 5., 6.], dtype=float32)>

In [83]:
a

<tf.Variable 'Variable:0' shape=(5,) dtype=float32, numpy=array([2., 0., 0., 0., 0.], dtype=float32)>

In [84]:
a[c]

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([2., 0., 0., 0.], dtype=float32)>

In [90]:
a=np.array([1,2,3,4,5])

In [101]:
tf.where((b>3)&(b<5),b,0)

<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[0., 0., 0., 4., 0.],
       [0., 0., 0., 0., 0.]], dtype=float32)>