In [1]:
import tensorflow as tf 
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

import keras
import util
import pandas as pd
import seaborn as sns
import numpy as np
from tensorflow.keras import layers
import time
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics  import accuracy_score
import math

tf.config.experimental_run_functions_eagerly(True)


Using TensorFlow backend.


# Cargamos los datos

Obtenemos los nombres de las imágenes y sus etiquetas

In [2]:
ALL_DATA = "E:Corrected_FA/ALL_DATA/"
info_data = "idaSearch_8_01_2020.csv"

# Obtenemos los diccionarios con los nombres de los ficheros que contienen las imágenes
AD_CN, groups = util.obtain_data_files(ALL_DATA, info_data)

imgs = []
labels = []
for key in AD_CN.keys():
    for img in AD_CN[key]:
        imgs.append(img) 
        labels.append(key)

image_labels = pd.DataFrame({"Image": imgs, "Label": labels} )
image_labels.head()

Unnamed: 0,Image,Label
0,ADNI_003_S_4081_MR_corrected_FA_image_Br_20120...,CN
1,ADNI_003_S_4081_MR_corrected_FA_image_Br_20131...,CN
2,ADNI_003_S_4119_MR_corrected_FA_image_Br_20120...,CN
3,ADNI_003_S_4119_MR_corrected_FA_image_Br_20120...,CN
4,ADNI_003_S_4119_MR_corrected_FA_image_Br_20130...,CN


Cargamos el dataset de imágenes

In [3]:
fpcas_temporal_right = pd.read_csv("FPCAS_Ventricle_Prueba_Norm.csv", index_col = 0)

Unimos imágenes con etiquetas

In [4]:
df_temporal_right = fpcas_temporal_right.merge(image_labels, left_on = "file", right_on = "Image").drop(["file", "Image"], axis=1)
data = df_temporal_right.copy()
df_temporal_right

Unnamed: 0,FPC_1,FPC_2,FPC_3,FPC_4,FPC_5,FPC_6,FPC_7,FPC_8,FPC_9,FPC_10,...,FPC_39,FPC_40,FPC_41,FPC_42,FPC_43,FPC_44,FPC_45,FPC_46,FPC_47,Label
0,1.426598,0.112181,-0.322595,-0.033974,0.137037,-0.154825,0.014088,-0.041808,0.032636,-0.120359,...,0.078227,-0.115859,0.002507,-0.071014,0.003743,-0.071799,-0.013446,0.023629,-0.033078,CN
1,1.447385,0.120397,-0.326797,-0.004794,0.071308,-0.121966,0.084857,-0.073673,0.041247,-0.115501,...,0.038899,-0.087622,-0.001160,-0.033449,-0.000071,-0.028571,-0.012131,0.011141,-0.051622,CN
2,1.671375,0.115622,0.097130,0.073061,-0.007771,0.018225,0.117936,0.046433,0.031110,0.035902,...,-0.004013,0.042103,0.023665,-0.045756,-0.038222,-0.026490,0.016583,0.026718,0.021704,CN
3,1.635482,0.068551,0.149279,0.064166,-0.014603,0.046951,0.025481,0.036126,0.058780,-0.003189,...,-0.005380,0.051569,-0.000363,-0.070958,-0.026215,-0.062745,0.015311,0.015830,0.027684,CN
4,1.531853,-0.371275,0.160936,-0.147053,0.181173,0.045500,0.081013,-0.113544,0.143626,0.046215,...,-0.017159,0.086668,-0.004663,-0.079088,-0.082042,-0.019592,0.076686,0.003082,0.001188,CN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,1.512948,-0.373168,0.136220,-0.007119,-0.110302,-0.001779,-0.073120,-0.008893,0.068726,-0.120261,...,0.026748,-0.079238,-0.099450,-0.002123,-0.015750,0.043629,-0.051654,-0.040108,0.055429,CN
366,1.537276,0.165208,0.091943,-0.045060,-0.034294,0.063803,-0.100678,0.043170,-0.031374,0.052864,...,0.074753,0.006285,-0.052607,-0.026298,0.014952,0.046099,-0.042051,-0.056600,0.107450,CN
367,1.403889,0.080721,-0.193643,0.015488,-0.048856,0.132803,0.133546,0.035902,0.068759,0.169236,...,-0.009194,-0.029963,-0.062132,-0.039073,-0.053222,0.018678,0.022814,-0.050268,0.074394,CN
368,1.549337,0.191282,0.039588,0.054031,0.033168,0.025229,0.044374,0.039591,0.041182,0.032756,...,-0.054471,-0.025476,0.002272,0.013597,-0.050911,0.034598,0.041045,-0.016493,0.003273,CN


# Creamos la GAN

## Generador

In [5]:
def make_generator_model(latent_size, condition_size, output_size):
    model = tf.keras.Sequential()
    model.add(layers.Dense(128, input_shape=(latent_size + condition_size,)))    
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    
    model.add(layers.Dense(64, input_shape=(latent_size + condition_size,)))    
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())
    

    model.add(layers.Dense(output_size, activation ="linear"))

    return model

## Discriminador

In [6]:
def make_discriminator_model(input_size, condition_size):
    model = tf.keras.Sequential()
    
    model.add(layers.Dense(128, input_shape=(input_size + condition_size,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())    
    
    model.add(layers.Dense(64, input_shape=(input_size + condition_size,)))
    model.add(layers.BatchNormalization())
    model.add(layers.LeakyReLU())    
    
    model.add(layers.Dense(1))

    return model

## Función de pérdida y optimizadores

In [7]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [8]:
def discriminator_loss(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [9]:
def generator_loss(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [10]:
generator_optimizer = tf.keras.optimizers.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)

## Bucle de entrenamiento

In [11]:
@tf.function
def train_step(generator, discriminator, images, labels, noise_dim, epoch):
    #print("Images shape: ", images.shape, "Labels shape: ", labels.shape)
    noise = tf.random.normal([labels.shape[0], noise_dim])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        #labels = tf.reshape(labels, (BATCH_SIZE,1))
        if epoch % 2 == 0:
            disc_train = False
        else: 
            disc_train = True
        #disc_train = True
        
        generated_images = generator(
            tf.concat([
                noise, 
                labels],
                axis = 1), 
            training=disc_train)
        
        
        real_output = discriminator(
            tf.concat([
                images, 
                labels],
                axis = 1), 
            training=disc_train)
        
        
        fake_output = discriminator(
            tf.concat([
                generated_images, 
                labels], 
                axis = 1), 
            training=disc_train)

        gen_loss = generator_loss(fake_output)
        disc_loss = discriminator_loss(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    if epoch % 2 == 0:
        discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
    return gen_loss.numpy(), disc_loss.numpy()

In [12]:
def train(generator, discriminator, dataset, epochs, noise_dim):
    history = []
    start = time.time()
    for epoch in range(epochs):
        

        gen_loss, disc_loss = 0, 0
        count = 0
        # TODO: pasar imagenes bien
        for image_batch, labels in dataset:
            g, d = train_step(generator, discriminator, image_batch, labels, noise_dim, epoch)    
            gen_loss += g
            disc_loss += d
            count += 1

        if epoch % 10 == 0:
            print ('Disc loss: {}, Gen loss: {}. Time for epoch {} is {} sec'.format(disc_loss / count, gen_loss / count, epoch, time.time()-start))
            history.append((disc_loss / count, gen_loss / count, epoch))
            start = time.time()
            
    print ('Disc loss: {}, Gen loss: {}. Time for epoch {} is {} sec'.format(disc_loss / count, gen_loss / count, epoch, time.time()-start))
    history.append((disc_loss / count, gen_loss / count, epoch))
    
    return history
  

## Creación de los datasets

In [13]:
data

Unnamed: 0,FPC_1,FPC_2,FPC_3,FPC_4,FPC_5,FPC_6,FPC_7,FPC_8,FPC_9,FPC_10,...,FPC_39,FPC_40,FPC_41,FPC_42,FPC_43,FPC_44,FPC_45,FPC_46,FPC_47,Label
0,1.426598,0.112181,-0.322595,-0.033974,0.137037,-0.154825,0.014088,-0.041808,0.032636,-0.120359,...,0.078227,-0.115859,0.002507,-0.071014,0.003743,-0.071799,-0.013446,0.023629,-0.033078,CN
1,1.447385,0.120397,-0.326797,-0.004794,0.071308,-0.121966,0.084857,-0.073673,0.041247,-0.115501,...,0.038899,-0.087622,-0.001160,-0.033449,-0.000071,-0.028571,-0.012131,0.011141,-0.051622,CN
2,1.671375,0.115622,0.097130,0.073061,-0.007771,0.018225,0.117936,0.046433,0.031110,0.035902,...,-0.004013,0.042103,0.023665,-0.045756,-0.038222,-0.026490,0.016583,0.026718,0.021704,CN
3,1.635482,0.068551,0.149279,0.064166,-0.014603,0.046951,0.025481,0.036126,0.058780,-0.003189,...,-0.005380,0.051569,-0.000363,-0.070958,-0.026215,-0.062745,0.015311,0.015830,0.027684,CN
4,1.531853,-0.371275,0.160936,-0.147053,0.181173,0.045500,0.081013,-0.113544,0.143626,0.046215,...,-0.017159,0.086668,-0.004663,-0.079088,-0.082042,-0.019592,0.076686,0.003082,0.001188,CN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365,1.512948,-0.373168,0.136220,-0.007119,-0.110302,-0.001779,-0.073120,-0.008893,0.068726,-0.120261,...,0.026748,-0.079238,-0.099450,-0.002123,-0.015750,0.043629,-0.051654,-0.040108,0.055429,CN
366,1.537276,0.165208,0.091943,-0.045060,-0.034294,0.063803,-0.100678,0.043170,-0.031374,0.052864,...,0.074753,0.006285,-0.052607,-0.026298,0.014952,0.046099,-0.042051,-0.056600,0.107450,CN
367,1.403889,0.080721,-0.193643,0.015488,-0.048856,0.132803,0.133546,0.035902,0.068759,0.169236,...,-0.009194,-0.029963,-0.062132,-0.039073,-0.053222,0.018678,0.022814,-0.050268,0.074394,CN
368,1.549337,0.191282,0.039588,0.054031,0.033168,0.025229,0.044374,0.039591,0.041182,0.032756,...,-0.054471,-0.025476,0.002272,0.013597,-0.050911,0.034598,0.041045,-0.016493,0.003273,CN


In [14]:
data_gan = data.copy()

BUFFER_SIZE = data_gan.shape[0]
BATCH_SIZE = 64
N_EPOCH = 1000
YtoX_noise_dim = 50
XtoY_noise_dim = 5


data_gan = data.sample(frac = 1) # Permutamos el dataset
X = data_gan.drop(["Label"], axis = 1).values
X = (X - X.mean(axis = 0)) / X.std(axis=0)
Y = data_gan["Label"].apply(lambda x: 1 if x == "AD" else 0).values.reshape((-1,1))

Creamos los datasets Y&rarr;X y X&rarr;Y

In [15]:
YtoX_dataset = tf.data.Dataset.from_tensor_slices((X.astype("float32"), Y.astype("float32"))).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
XtoY_dataset = tf.data.Dataset.from_tensor_slices((Y.astype("float32"), X.astype("float32"))).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

# Experimento Causal

## GAN Y &rarr; X

In [16]:
YtoX_generator = make_generator_model(YtoX_noise_dim, Y.shape[1], X.shape[1])
YtoX_discriminator = make_discriminator_model(X.shape[1], Y.shape[1])

In [17]:
YtoX_history = train(YtoX_generator, YtoX_discriminator, YtoX_dataset, N_EPOCH, YtoX_noise_dim)

Disc loss: 1.373312731583913, Gen loss: 0.6498998999595642. Time for epoch 0 is 0.635037899017334 sec
Disc loss: 1.3474819660186768, Gen loss: 0.6266331573327383. Time for epoch 10 is 2.151697874069214 sec
Disc loss: 1.3043315609296162, Gen loss: 0.6136070787906647. Time for epoch 20 is 2.0976877212524414 sec
Disc loss: 1.279041091601054, Gen loss: 0.6138083438078562. Time for epoch 30 is 2.1457042694091797 sec
Disc loss: 1.2711676955223083, Gen loss: 0.6347746551036835. Time for epoch 40 is 2.129661798477173 sec
Disc loss: 1.3026732405026753, Gen loss: 0.6555717388788859. Time for epoch 50 is 2.0520212650299072 sec
Disc loss: 1.3163567980130513, Gen loss: 0.6952225267887115. Time for epoch 60 is 2.047999858856201 sec
Disc loss: 1.4241509040196736, Gen loss: 0.6244474252065023. Time for epoch 70 is 2.057943105697632 sec
Disc loss: 1.438522179921468, Gen loss: 0.6175770362218221. Time for epoch 80 is 2.128282070159912 sec
Disc loss: 1.3435481389363606, Gen loss: 0.6797130604585012. Time

## GAN X &rarr; Y

In [18]:
XtoY_generator = make_generator_model(XtoY_noise_dim, X.shape[1], Y.shape[1])
XtoY_discriminator = make_discriminator_model(Y.shape[1], X.shape[1])

In [19]:
XtoY_history = train(XtoY_generator, XtoY_discriminator, XtoY_dataset, N_EPOCH, XtoY_noise_dim)

Disc loss: 1.483480970064799, Gen loss: 0.6355150640010834. Time for epoch 0 is 0.19099998474121094 sec
Disc loss: 1.8256706794102986, Gen loss: 0.46653980016708374. Time for epoch 10 is 2.034658432006836 sec
Disc loss: 1.0755986173947651, Gen loss: 1.0682805279890697. Time for epoch 20 is 2.052999973297119 sec
Disc loss: 1.969055414199829, Gen loss: 0.4563161830107371. Time for epoch 30 is 2.056826114654541 sec
Disc loss: 1.0833531618118286, Gen loss: 0.9909741878509521. Time for epoch 40 is 2.0725321769714355 sec
Disc loss: 1.7565178473790486, Gen loss: 0.5118977626164755. Time for epoch 50 is 2.0060019493103027 sec
Disc loss: 1.2651514808336894, Gen loss: 0.9078514476617178. Time for epoch 60 is 2.0769081115722656 sec
Disc loss: 1.5009206533432007, Gen loss: 0.6101399461428324. Time for epoch 70 is 2.094919204711914 sec
Disc loss: 1.3542619744936626, Gen loss: 0.7377816239992777. Time for epoch 80 is 2.107999086380005 sec
Disc loss: 1.4185601075490315, Gen loss: 0.6709955235322317. 

# C2ST

In [20]:
TRAIN_TEST_PROP = 0.7

Y_real = Y
X_real = X

real_dataset = np.concatenate((X_real, Y_real), axis = 1).astype("float32")
real_labels = np.ones((real_dataset.shape[0],1), dtype = "float32")

In [21]:
X_real.shape

(370, 47)

In [22]:
def generate_examples(generator, num_imgs, noise_dim, condition):
    noise = tf.random.normal([num_imgs, noise_dim], dtype= "float32")
    labels = tf.convert_to_tensor(condition, dtype= "float32")
    inp = tf.concat([noise, labels], axis = 1)
    generated_image = generator(inp, training=False)
    return generated_image.numpy()

## C2ST Y &rarr; X

In [23]:
n_images = Y_real.shape[0]

X_fake = generate_examples(YtoX_generator, n_images, YtoX_noise_dim, Y_real)
YtoX_fake_dataset = np.concatenate((X_fake, Y_real), axis = 1).astype("float32")
YtoX_fake_labels = np.zeros((n_images,1), dtype= "float32")

In [24]:
YtoX_total_dataset = np.concatenate((real_dataset, YtoX_fake_dataset), axis = 0)
YtoX_total_labels = np.concatenate((real_labels, YtoX_fake_labels), axis = 0)

In [25]:
n = YtoX_total_dataset.shape[0]

random_range = np.random.permutation(n)

train_indexes = random_range[:int(TRAIN_TEST_PROP * n)]
n_train = train_indexes.shape[0]
test_indexes = random_range[int(TRAIN_TEST_PROP * n):]
n_test = test_indexes.shape[0]

X_train_YtoX = YtoX_total_dataset[train_indexes, :]
Y_train_YtoX = YtoX_total_labels[train_indexes, :].reshape(-1)

X_test_YtoX = YtoX_total_dataset[test_indexes, :]
Y_test_YtoX = YtoX_total_labels[test_indexes, :].reshape(-1)

In [26]:
classifier_YtoX = KNeighborsClassifier(n_neighbors=int(math.sqrt(n_train)))
# Entrenamos con los datos de entrenamiento
classifier_YtoX.fit(X_train_YtoX, Y_train_YtoX)
# Evaluamos con los de test
accuracy_score(Y_test_YtoX, classifier_YtoX.predict(X_test_YtoX))

0.7792792792792793

## C2ST X &rarr; Y

In [28]:
n_images = X_real.shape[0]

Y_fake = generate_examples(XtoY_generator, n_images, XtoY_noise_dim, X_real)
XtoY_fake_dataset = np.concatenate((X_real, Y_fake), axis = 1).astype("float32")
XtoY_fake_labels = np.zeros((n_images,1), dtype= "float32")

In [29]:
XtoY_total_dataset = np.concatenate((real_dataset, XtoY_fake_dataset), axis = 0)
XtoY_total_labels = np.concatenate((real_labels, XtoY_fake_labels), axis = 0)

In [30]:
n = XtoY_total_dataset.shape[0]
random_range = np.random.permutation(n)

train_indexes = random_range[:int(TRAIN_TEST_PROP * n)]
n_train = train_indexes.shape[0]
test_indexes = random_range[int(TRAIN_TEST_PROP * n):]
n_test = test_indexes.shape[0]

X_train_XtoY = XtoY_total_dataset[train_indexes, :]
Y_train_XtoY = XtoY_total_labels[train_indexes, :].reshape(-1)

X_test_XtoY = XtoY_total_dataset[test_indexes, :]
Y_test_XtoY = XtoY_total_labels[test_indexes, :].reshape(-1)

In [31]:
classifier_XtoY = KNeighborsClassifier(n_neighbors=int(math.sqrt(n_train)))
# Entrenamos con los datos de entrenamiento
classifier_XtoY.fit(X_train_XtoY, Y_train_XtoY)
# Evaluamos con los de test
accuracy_score(Y_test_XtoY, classifier_XtoY.predict(X_test_XtoY))

0.40540540540540543

# Contraste de hipotesis