# GANomaly Notebook Experiments

## Initial Configurations

### Selecting the device to work with

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

### Libraries import

In [None]:
import sys
import random
import gc
import numpy as np
import tensorflow as tf
from IPython.display import clear_output

sys.path.append("../")

In [None]:
from models.ganomaly.model import get_2D_models, get_3D_models
from models.ganomaly.utils.losses import l1_loss, l2_loss, BCELoss
from models.ganomaly.utils.preprocessing import normalize_accros_channels, min_max_scaler, resize, get_center_of_volume, undo_enumerate
from models.ganomaly.utils.weights_init import reinit_model
from models.ganomaly.utils.exp_docs import experiment_folder_path, get_metrics_path, get_outputs_path
from datasets.dict_features import get_parkinson
from utils.metrics import get_true_positives, get_true_negatives, get_false_positives, get_false_negatives, accuracy, precision, recall, specificity, f1_score, get_AUC,shapiroWilks_test, dagostinoPearson_test, bartlett_test, levene_test, fOneWay_test

### GPU Memory Configuration

In [None]:
if os.getenv("CUDA_VISIBLE_DEVICES") != '-1':
    gpus = tf.config.experimental.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(gpus[0], True)
tf.debugging.set_log_device_placement(False)

## Dataset Processing

### Data loading

In [None]:
N_CPUS = 16
dataset_path = "/data/Datasets/parkinson/tf_records/data2020_cuttedFrames/parkinson_2020_cutted.tfrecord"
encoding_dictionary = get_parkinson()
encoding_dictionary

### General extraction function for tfrecords

In [None]:
def from_bytes_to_dict(example_bytes, encoding_dictionary):
    return tf.io.parse_single_example(example_bytes, encoding_dictionary)

def extract_data_from_dict(example_dict):
    f = example_dict["frames"]
    h = example_dict["height"]
    w = example_dict["width"]
    c = example_dict["channels"]
    raw_volume = tf.io.decode_raw(example_dict["video"], tf.uint8)
    volume = tf.reshape(raw_volume, [f,h,w,c])
    return tf.cast(volume, dtype=tf.float32), example_dict["parkinson"]

### Data pipeline

In [None]:
raw_data = tf.data.TFRecordDataset(dataset_path)
dict_data = raw_data.map(lambda x: from_bytes_to_dict(x, encoding_dictionary), N_CPUS)
total_data = dict_data.map(extract_data_from_dict, N_CPUS)

resize_data = total_data.map(lambda x,y: resize(x, y, [64,64]), N_CPUS)
resize_data

In [None]:
shape_videos = []
labels_videos = []
for x, y in resize_data:
    shape_videos.append(x.numpy().shape)
    labels_videos.append(y.numpy())
shape_videos = np.r_[shape_videos]
labels_videos = np.r_[labels_videos]
print("Data information about the data")
print("Total videos: ", shape_videos.shape[0])
print("Min value of frames: ", np.min(shape_videos[:,0]))
print("Max value of frames: ", np.max(shape_videos[:,0]))
print("Mean value of frames: ", np.mean(shape_videos[:,0]))

In [None]:
sampled_data = resize_data.map(lambda x, y: get_center_of_volume(x, y, 64), N_CPUS)
normalized_data = sampled_data.map(lambda x,y: normalize_accros_channels(x, y, 0.5, 0.5), N_CPUS)
scaled_data = normalized_data.map(lambda x, y: min_max_scaler(x, y, -1., 1.), N_CPUS)


normal_data = scaled_data.filter(lambda x,y: tf.equal(y, 0))
abnormal_data = scaled_data.filter(lambda x,y: tf.equal(y, 1))
normal_data, abnormal_data

In [None]:
# from IPython.display import display, clear_output
# import matplotlib.pyplot as plt
# from ipywidgets import interact, IntSlider

# x = x.numpy().astype(np.uint8)

# interact(lambda frame: plt.imshow(x[frame-1]), frame=IntSlider(min=1, max=x.shape[0], step=1));

## Model

### Model params

In [None]:
isize = 64 # Input size of the data (image or volume)
nz = 100 # Context vector size
nc = 3 # Quantity of channels in the data
ngf = 64 # Quantity of initial filters in the first convolution of the encoder
extra_layers = 0 # Quantity of layer blocks to add before reduction
w_gen = (1, 50, 1) # Tuple with 3 elements (w_adv, w_con, w_enc) to use in the error of generator

### Experiment params

In [None]:
model_dimension = "3D" # Dimension of model to use in experiment
batch_size = 16 # Size of the bath for the model
epochs = 15000 # Quantity of epochs to do in the training
beta_1 = 0.5 # Momentum of beta 1 in adam optimizer for generator and discriminator
beta_2 = 0.999 # Momentum of beta 2 in adam optimizer for generator and discriminator
lr = 0.0002 # Initial learning rate for adam optimizer

### Replicability configuration

In [None]:
random.seed(8128)
np.random.seed(8128)
tf.random.set_seed(8128)

### Experiment documentation

In [None]:
experiment_path, experiment_id = experiment_folder_path("/home/jefelitman/Overleaf_Server/", model_dimension, isize, nc)

# Metrics folder for model graphs
metric_save_path = get_metrics_path(experiment_path)

# Output folder for outputs
outputs_path = get_outputs_path(experiment_path)
experiment_path

In [None]:
readme = open(os.path.join(experiment_path, "README.txt"), "w+")
readme.write(
"""This file contains information about the experiment made in this instance.

All models saved don't include the optimizer, but this file explains how to train in the same conditions.

Basic notation:
- {i}_Ganomaly_{d}: Experiment id, name of the model and operation dimensionality of convolutions.
- H x W x C or F x H x W x C: Data dimensions used where F are frames, H height, W width and C channels.

Experiment settings:
- The seed used was 8128 for python random module, numpy random and tf random after the library importations.
- The batch size was of {batch}.
- The optimizer used in this experiment was Adam for generator and discriminator.
- The number of classes in this dataset are 2 (Normal and Parkinson) .
- This experiment use the data of parkinson_2020_cutted.tfrecord from data2020_cuttedFrames data.
- The initial lr was of {lr}.
- The beta 1 and beta 2 for adam optimizer was {beta_1} and {beta_2} respectively.
- The total epochs made in this experiment was of {epochs}.
- The context vector size (nz) was of {nz}.
- The # channels in data (nc) was of {nc}.
- The initial filters in the first convolution of the encoder was {ngf}.
- The quantity of layer blocks to add before reduction was of {extra_layers}.
- The weights for adversarial, contextual and encoder error in generator was {w_gen}.

Transformations applied to data (following this order):
- Resize: We resize the frames of volumes to H x W (64 x 64).
- Centered volume: We take 64 frames on the center of volume to train and test the data.
- Normalize: We normalize the volume with mean and std of 0.5 for both.
- Scale: We scale the data between -1 and 1 using min max scaler to be comparable with generated images.
- Randomize: We randomize the order of samples in every epoch.

Training process:
- The data doesn't have train and test partition but we make the partitions like this:
    * 75% of normal data is used in train randomly selected.
    * 25% of normal data is used in test randomly selected.
    * 100% of abnormal (parkinson) data is used in test.
""".format(
        i = experiment_id,
        d = model_dimension,
        batch = batch_size,
        lr = lr,
        beta_1 = beta_1,
        beta_2 = beta_2,
        epochs = epochs,
        nz = nz,
        nc = nc,
        ngf = ngf,
        extra_layers = extra_layers,
        w_gen = w_gen
    )
)
readme.close()

### Models creation

In [None]:
gen_model, disc_model = globals()["get_{}_models".format(
    model_dimension
)](isize, nz, nc, ngf, extra_layers)
gen_model.summary()
disc_model.summary()

In [None]:
gen_model.outputs

### Optimizers creation

In [None]:
gen_opt = tf.keras.optimizers.Adam(learning_rate=lr, beta_1=beta_1, beta_2=beta_2)
disc_opt = tf.keras.optimizers.Adam(learning_rate=lr, beta_1=beta_1, beta_2=beta_2)
gen_opt, disc_opt

### Train and Inference steps

In [None]:
%load '../models/ganomaly/utils/steps.py'

## Training process

### Data preparation

In [None]:
normal_data = normal_data.cache().shuffle(88, reshuffle_each_iteration=True)

abnormal_data = abnormal_data.cache().shuffle(88, reshuffle_each_iteration=True)

### Metrics creation

In [None]:
TP = get_true_positives()
TN = get_true_negatives()
FP = get_false_positives()
FN = get_false_negatives()
AUC = get_AUC()

train_metrics_csv = open(os.path.join(metric_save_path,"train.csv"), "w+")
train_metrics_csv.write("epoch, gen_error, disc_error, accuracy, precision, recall, specificity, f1_score, auc\n")

test_metrics_csv = open(os.path.join(metric_save_path,"test.csv"), "w+")
test_metrics_csv.write("epoch, accuracy, precision, recall, specificity, f1_score, auc\n")

### Loop

In [None]:
# Falta salvar las imagenes falsas que se van creando
# Falta salvar los errores de los vectores apra el analisis estadistico

In [None]:
for epoch in range(epochs):
    # Data partition for train and test
    partition_point = 66
    train_data = normal_data.enumerate().filter(
        lambda i, xy: tf.math.less(i, partition_point)
    ).map(undo_enumerate, N_CPUS).batch(batch_size).prefetch(-1)
    test_data = normal_data.enumerate().filter(
        lambda i, xy: tf.math.greater_equal(i, partition_point)
    ).map(undo_enumerate, N_CPUS).concatenate(abnormal_data).batch(batch_size).prefetch(-1)
    
    for step, xy in enumerate(train_data):
        err_g, err_d, fake_images, latent_i, latent_o, feat_real, feat_fake = train_step(xy[0])
        
        if err_d < 1e-5:
            reinit_model(disc_model)
            
        anomaly_scores = tf.math.reduce_mean(tf.math.pow(tf.squeeze(latent_i-latent_o), 2), axis=1)
        anomaly_scores = (anomaly_scores - tf.reduce_min(anomaly_scores)) / (
            tf.reduce_max(anomaly_scores) - tf.reduce_min(anomaly_scores)
        )
            
        TP.update_state(xy[1], anomaly_scores)
        TN.update_state(xy[1], anomaly_scores)
        FP.update_state(xy[1], anomaly_scores)
        FN.update_state(xy[1], anomaly_scores)
        AUC.update_state(xy[1], anomaly_scores)
        
        clear_output(wait=True)
        print("""
            Epoch: {i} - Train Step: {j}
            Generator error: {loss_g}
            Discriminator error: {loss_d}
            Accuracy: {acc}
            Precision: {pre}
            Recall: {rec}
            Specificity: {spe}
            F1_Score: {f1}
            AUC: {auc}
            """.format(
                i = epoch + 1,
                j = step + 1,
                loss_g = err_g,
                loss_d = err_d,
                acc = accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()),
                pre = precision(TP.result().numpy(), FP.result().numpy()),
                rec = recall(TP.result().numpy(), FN.result().numpy()),
                spe = specificity(TN.result().numpy(), FP.result().numpy()),
                f1 = f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()),
                auc = AUC.result().numpy()
            )
        )
        
    # Save train metrics
    train_metrics_csv.write("{e}, {loss_g}, {loss_d}, {acc}, {pre}, {rec}, {spe}, {f1}, {auc}\n".format(
        e = epoch,
        loss_g = err_g,
        loss_d = err_d,
        acc = accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()),
        pre = precision(TP.result().numpy(), FP.result().numpy()),
        rec = recall(TP.result().numpy(), FN.result().numpy()),
        spe = specificity(TN.result().numpy(), FP.result().numpy()),
        f1 = f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()),
        auc = AUC.result().numpy()
    ))
    TP.reset_states()
    TN.reset_states()
    FP.reset_states()
    FN.reset_states()
    AUC.reset_states()
    
    for step, xy in enumerate(test_data):
        fake_images, latent_i, latent_o, feat_real, feat_fake = test_step(xy[0])
        
        anomaly_scores = tf.math.reduce_mean(tf.math.pow(tf.squeeze(latent_i-latent_o), 2), axis=1)
        anomaly_scores = (anomaly_scores - tf.reduce_min(anomaly_scores)) / (
            tf.reduce_max(anomaly_scores) - tf.reduce_min(anomaly_scores)
        )
            
        TP.update_state(xy[1], anomaly_scores)
        TN.update_state(xy[1], anomaly_scores)
        FP.update_state(xy[1], anomaly_scores)
        FN.update_state(xy[1], anomaly_scores)
        AUC.update_state(xy[1], anomaly_scores)
        
        clear_output(wait=True)
        print("""
            Epoch: {i} - Test Step: {j}
            Accuracy: {acc}
            Precision: {pre}
            Recall: {rec}
            Specificity: {spe}
            F1_Score: {f1}
            AUC: {auc}
            """.format(
                i = epoch + 1,
                j = step + 1,
                acc = accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()),
                pre = precision(TP.result().numpy(), FP.result().numpy()),
                rec = recall(TP.result().numpy(), FN.result().numpy()),
                spe = specificity(TN.result().numpy(), FP.result().numpy()),
                f1 = f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()),
                auc = AUC.result().numpy()
            )
        )
        
    # Save test metrics
    test_metrics_csv.write("{e}, {acc}, {pre}, {rec}, {spe}, {f1}, {auc}\n".format(
        e = epoch,
        acc = accuracy(TP.result().numpy(), TN.result().numpy(), FP.result().numpy(), FN.result().numpy()),
        pre = precision(TP.result().numpy(), FP.result().numpy()),
        rec = recall(TP.result().numpy(), FN.result().numpy()),
        spe = specificity(TN.result().numpy(), FP.result().numpy()),
        f1 = f1_score(TP.result().numpy(), FP.result().numpy(), FN.result().numpy()),
        auc = AUC.result().numpy()
    ))
    TP.reset_states()
    TN.reset_states()
    FP.reset_states()
    FN.reset_states()
    AUC.reset_states()
    
train_metrics_csv.close()
test_metrics_csv.close()

### Save models

In [None]:
gen_model.save(os.path.join(experiment_path,"gen_model.h5"), include_optimizer=False, save_format='h5')
disc_model.save(os.path.join(experiment_path,"disc_model.h5"), include_optimizer=False, save_format='h5')

In [None]:
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
from ipywidgets import interact, IntSlider

x = min_max_scaler(fake_images[1], 0, 0, 1)[0].numpy()

interact(lambda frame: plt.imshow(x[frame-1]), frame=IntSlider(min=1, max=x.shape[0], step=1));

In [None]:
z

In [None]:
var = min_max_scaler(fake_images[0], 0, 0, 1)[0].numpy()
tf.reduce_max(var), tf.reduce_min(var)