In [1]:
from sklearn import datasets
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.constraints import Constraint
from tensorflow.keras.layers import Layer, InputSpec, ReLU
from tensorflow.keras.initializers import Ones
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K
import csv
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib

In [2]:
class MAct(Layer):

    def __init__(self, **kwargs):
        super(MAct, self).__init__(**kwargs)
        self.supports_masking = True

    def build(self, input_shape):
        self.c = self.add_weight(name="c",
                                shape=(input_shape[1],),
                                initializer='zeros',
                                trainable=True) # Initialiseerida c ühtedeks / nullideks
        self.b = self.add_weight(name="b",
                                shape=(input_shape[1],),
                                initializer='zeros',
                                trainable=True) # Initialiseerida b nullideks
        super(MAct, self).build(input_shape)
        

    def call(self, inputs):
        first_exp = tf.exp(self.c - tf.abs(inputs))

        p = (first_exp + tf.exp(self.b)) / tf.reduce_sum(first_exp + tf.exp(self.b), axis=1, keepdims=True)
        
        #p = tf.exp(inputs) / tf.reduce_sum(tf.exp(inputs), axis=0, keepdims=True)
        return p

    def compute_output_shape(self, input_shape):
        return input_shape

In [3]:
def cross_ent(probs, y):
    #losses = tf.nn.softmax_cross_entropy_with_logits(logits=probs, labels=y) # Tavaline CE
    cce = CategoricalCrossentropy()
    losses = cce(probs, y)
    return tf.reduce_mean(losses)


def max_conf(probs, dim):
    y = tf.argmax(probs, 1)
    y = tf.one_hot(y, dim)
    #losses = -tf.nn.softmax_cross_entropy_with_logits(logits=probs, labels=y) # Tavaline CE
    cce = CategoricalCrossentropy()
    losses = -cce(probs, y)
    return tf.reduce_mean(losses)

In [4]:
def gen_adv(x, dim):
    eps = 0.025
    n_iters = 4
    step_size = 0.02

    unif = tf.random.uniform(minval=-eps, maxval=eps, shape=tf.shape(x))
    x_adv = tf.clip_by_value(x + unif, 0., 1.)
    
    for i in range(n_iters):
        x_adv = tf.Variable(x_adv)
        with tf.GradientTape() as tape:
            loss = max_conf(model(x_adv), dim)
            grad = tape.gradient(loss, x_adv)
            g = tf.sign(grad)

        x_adv_start = x_adv + step_size*g
        x_adv = tf.clip_by_value(x_adv, 0., 1.)
        delta = x_adv - x_adv_start
        delta = tf.clip_by_value(delta, -eps, eps)
        x_adv = x_adv_start + delta

    return x_adv

In [5]:
def plot(model, plot_min, plot_max, max_prob, name, n_iters, MAct):
    n_grid = 200
    x_plot = np.linspace(plot_min, plot_max, n_grid)
    y_plot = np.linspace(plot_min, plot_max, n_grid)
    
    points = []
    for xx in x_plot:
        for yy in y_plot:
            points.append((yy, xx))
    points = np.array(points)
    
    if MAct:
        probs = model(points).numpy()
    else:
        logits = model(points) # For not MAct
        probs = tf.nn.softmax(logits).numpy()
    if max_prob:
        z_plot = probs.max(1)
    else:
        z_plot = probs[:, 0]
    z_plot = z_plot.reshape(len(x_plot), len(y_plot)) * 100
    
    ax = plt.gca()
    
    vmax = 100
    vmin = 50 if max_prob else 0
    plt.contourf(x_plot, y_plot, z_plot, levels=np.linspace(50, 100, 50))
    cbar = plt.colorbar(ticks=np.linspace(vmin, vmax, 6))
    
    cbar.ax.set_title('confidence', fontsize=12, pad=12)
    cbar.set_ticklabels(['50%', '60%', '70%', '80%', '90%', '100%'])
    
    y_np = np.array(y)
    X0 = X[y_np.argmax(1)==0]
    X1 = X[y_np.argmax(1)==1]
    plt.scatter(X0[:, 0], X0[:, 1], s=20, edgecolors='red', facecolor='None',
                marker='o', linewidths=0.2)
    plt.scatter(X1[:, 0], X1[:, 1], s=20, edgecolors='green', facecolor='None',
                marker='s', linewidths=0.2)
    plt.xlim([plot_min, plot_max])
    plt.ylim([plot_min, plot_max])
    
    margin = 0.01
    #rect = matplotlib.patches.Rectangle((-margin, -margin), 1.0+2*margin, 1.0+2*margin, 
    #                                    linewidth=1.5, color='white', fill=False)
    #ax.add_patch(rect)
    
    plt.gca().set_aspect('equal', adjustable='box')
    plt.savefig('two_moons_four_paper/{}_{:.1f}_{:.1f}_iters={}_max_prob={}.pdf'.format(
        name, plot_min, plot_max, n_iters, max_prob), transparent=True)
    plt.clf()
    #plt.show()

In [31]:
model = Sequential([
    Dense(200,input_shape=(2,)),
    Activation('relu'),
    Dense(100),
    Activation('relu'),
    Dense(50),
    Activation('relu'),
    Dense(25),
    Activation('relu'),
    #Dense(10),
    #Activation('tanh'),
    #Dense(100),
    #Activation('selu'),
    Dense(2),
    #MAct(),
    Activation('softmax')
])

optimizer = Adam(learning_rate=0.01)

In [32]:
dim = 2
# More noise in the moons makes the task harder
X, y = datasets.make_moons(n_samples=2000, shuffle=True, noise=.02)
# Rescale and shift the dataset to better fit into zero-one box
X = (X + 1.6) / 4
X[:, 0] = X[:, 0] - 0.035
X[:, 1] = (X[:, 1] - 0.17) * 1.75

In [33]:
def extras_generator(X, y, x_lat_mult, x_long_mult):
    X_extra, y_extra = datasets.make_moons(n_samples=2000, shuffle=True, noise=.02)
    X_extra = (X_extra + 1.6) / 4
    X_extra[:, 0] = X_extra[:, 0] - 0.035
    X_extra[:, 1] = (X_extra[:, 1] - 0.17) * 1.75
    X_extra[:, 0] = X_extra[:,0] + x_lat_mult
    X_extra[:, 1] = X_extra[:, 1] + x_long_mult
    X = np.append(X, X_extra, axis=0)
    y = np.append(y, y_extra, axis=0)
    return X, y

In [34]:
X, y = extras_generator(X, y, 1, 0)
X, y = extras_generator(X, y, 0, 1)
X, y = extras_generator(X, y, 1, 1)

In [35]:
#X_test, y_test = datasets.make_moons(n_samples=400, shuffle=True, noise=.02)
#X_test = (X_test + 1.6) / 4
#X_test[:, 0] = X_test[:, 0] - 0.035
#X_test[:, 1] = (X_test[:, 1] - 0.17) * 1.75

In [36]:
print(y)
y = tf.one_hot(y, dim)
print(y)

[1 0 0 ... 0 0 1]
tf.Tensor(
[[0. 1.]
 [1. 0.]
 [1. 0.]
 ...
 [1. 0.]
 [1. 0.]
 [0. 1.]], shape=(8000, 2), dtype=float32)


In [37]:
acet = False

In [38]:
if acet:
    n_iter = 900
else:
    n_iter = 4000

In [39]:
weights_list = []
info_list = []

# Custom training cycle going through the entire dataset
for epoch in range(1, n_iter+1):
    X_noise = tf.random.uniform([2*X.shape[0], X.shape[1]])
    # If we use the ACET method, then adversarial noise will be generated
    if acet:
        X_noise = gen_adv(X_noise, dim)
    # Context used to calculate the gradients of the model
    with tf.GradientTape() as tape:
        logits = model(X)
        logits_noise = model(X_noise)
        loss_main = cross_ent(logits, y)
        loss_acet = acet * max_conf(logits_noise, dim)
        loss = loss_main + loss_acet
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    if epoch % 100  == 0:
        train_err = np.mean(logits.numpy().argmax(1) != y.numpy().argmax(1))
        print("Iter {:03d}: loss_main={:.10f} loss_acet={:.3f} err={:.2%}"
              .format(epoch, loss_main, loss_acet, train_err))
        
        weights = model.layers[-1].get_weights()
        info_list.append("Iter {:03d}: loss_main={:.10f} loss_acet={:.6f} err={:.2%}"
                         .format(epoch, loss_main, loss_acet, train_err))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Iter 100: loss_main=5.1294589043 loss_acet=-0.000 err=31.87%
Iter 200: loss_main=4.7337722778 loss_acet=-0.000 err=29.36%
Iter 300: loss_main=4.6478338242 loss_acet=-0.000 err=28.84%
Iter 400: loss_main=3.9584867954 loss_acet=-0.000 err=24.51%
Iter 500: loss_main=2.8115282059 loss_acet=-0.000 err=17.36%
Iter 600: loss_main=2.4040732384 loss_acet=-0.000 err=14.85%
Iter 700: loss_main=2.8179874420 loss_acet=-0.000 err=17.45%
Iter 800: loss_main=2.5036611557 loss_acet=-0.000 err=15.50%
Iter 900: loss_main=2.4397156239 loss_acet=-0.000 err=15.12%
Iter 1000: loss_main=2.0667972565 loss_acet=-0.000 err=12.81%
Iter 1100: loss_main=2.0148167610 loss_acet=-0.000 err=12.50%
Iter 1200: loss_main=2.014

In [40]:
name = "four_moons_paper_2_no_mact"

In [41]:
file_name = "two_moons_four_paper/{}_iters={}.csv".format(name, n_iter)
with open(file_name, 'w', newline='') as myfile:
    wr = csv.writer(myfile, delimiter="\n")
    wr.writerow(info_list)

In [42]:
plot(model, 0.0, 2.0, max_prob=True,name=name, n_iters=n_iter, MAct=True)
#plot(model, 0.3, 0.5, max_prob=True)
#plot(model, -2.0, 3.0, max_prob=True)
plot(model, -5.0, 6.0, max_prob=True,name=name, n_iters=n_iter, MAct=True)
plot(model, -10.0, 10.0, max_prob=True,name=name, n_iters=n_iter, MAct=True)

<Figure size 432x288 with 0 Axes>

Dense 200, dense 100, dense 50 selu MAct lrate 0.01 õppid tublisti, aga viimasel 100 iteratsioonil läks katki
Dense 200, dense 100, dense 50 tanh MAct lrate 0.01 õpib hästi
Dense 200, dense 100, dense 50 tanh MAct lrate 0.01 zeros ei õpi nii hästi, kui initialiseeritud ühtedeks õpib hästi
Dense 200, dense 100, dense 50, dense 25 tanh MAct lrate 0.01 zeros õpib paremini
Dense 200, dense 100, dense 50, dense 25 tanh MAct lrate 0.01 ei õppinud üldse
Dense200_Dense100_Dense50_Dense25_Dense10_tanh_MAct_lrate0.01_zeros õppis nõrgalt
Dense200_Dense100_Dense50_Dense25_tanh_MAct_lrate0.01_zeros 3000 iteratsioon. Pikalt oli nõrk, aga lõpus leidis optimumi

In [45]:
def pull(model):
    return "{{ task_instance.xcom_pull(task_ids='%s', key='return_value')[0] }}" % model

print(pull('Yolo'))

{{ task_instance.xcom_pull(task_ids='Yolo', key='return_value')[0] }}
