# <font color='white'>**Libraries**</font>

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import tensorflow as tf
import sklearn
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, ConfusionMatrixDisplay 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input,Dense,GlobalAveragePooling2D,Flatten,concatenate,BatchNormalization, Dropout
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras import regularizers
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.preprocessing import image
from tensorflow import keras
import imageio
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras import regularizers
AUTOTUNE = tf.data.AUTOTUNE
from tqdm import tqdm
from numpy import asarray
import random
from sklearn.metrics import roc_auc_score
from sklearn import preprocessing
from tensorflow import keras
import os
import math

In [None]:
import tensorflow as tf
print(tf.__version__)
tf.config.run_functions_eagerly(True)

# <font color='red'>**Helper functions**</font>

# <font color='red'>**Data**</font>

### Loading csv files and dataframes by fold

In [None]:
def load_dataframes(task):
    """
    Loads data from three CSV files (training, validation, and test) into pandas DataFrames.

    Returns:
    train_df (pandas.DataFrame): DataFrame containing the training data with two columns, 'path' and 'label'.
    val_df (pandas.DataFrame): DataFrame containing the validation data with two columns, 'path' and 'label'.
    test_df (pandas.DataFrame): DataFrame containing the test data with two columns, 'path' and 'label'.
    """
    # Set the path for the CSV files
    print("working on task: ", task)
    
    gen_path = '../../../../../../Datasets/Parkinson/radiological/PPMI/spect-mri/filtered/' 
    synthetic_data = "../imgs_results/full_rois/preprocessed/mri_to_spect/"
    
    if task == "control_to_pd":
        csv_train = os.path.join(gen_path + 'control_pd_MRI_fullRois_TRAIN.csv')
        csv_test = os.path.join(gen_path + 'control_pd_MRI_fullRois_TEST.csv')
    if task == "mri_to_spect":
        csv_train = os.path.join(gen_path + 'embc_extension/extension_control_pd_SPECT_fullRois_TRAIN.csv')
        csv_test = os.path.join(gen_path + 'embc_extension/extension_control_pd_SPECT_fullRois_TEST.csv')
    else:
        csv_train = os.path.join(synthetic_data + 'full_train_mriSpectFullRois.csv')
        csv_test = os.path.join(synthetic_data + 'full_test_mriSpectFullRois.csv')
        
        

    # Load the training data from the CSV file and assign column names to the DataFrame
    train_df = pd.read_csv(csv_train, header=None)
    train_df.columns = ['path', 'label']
    
    #========= here for valid set from train set =========
    
    # X = train_df['path']
    # y = train_df['label']

    # # Split the training data into training and validation sets (90-10 split ratio)
    # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=14)

    # # Concatenate the training data and their labels to create the training DataFrame
    # train_df = pd.concat([X_train, y_train], axis=1)

    # # Concatenate the validation data and their labels to create the validation DataFrame
    # val_df = pd.concat([X_val, y_val], axis=1)
    
    #========= until here =========

    # Load the test data from the CSV file and assign column names to the DataFrame
    test_df = pd.read_csv(csv_test, header=None)
    test_df.columns = ['path', 'label']

    # Return the DataFrames containing the loaded data
    return train_df, test_df


# <font color='red'>**Networks**</font>

In [None]:
def make_model(arquitectura, height, width, num_clases):
    
    #general custom setup
    first_dense_layer_neurons  = 1024 
    second_dense_layer_neurons = 512 
    use_global_average_pooling = True 
    use_batch_norm             = True 
    use_drop_out               = True  
    
    input_shape = (height, width, 3) #for adjust the rgb requirements
    
    print("cargando red: ", arquitectura)
    if arquitectura == 'MobileNet':        
        base_model = tf.keras.applications.MobileNet(weights='imagenet', include_top=False, 
                                                     input_shape=input_shape)
        #making the transfer learning
        for layer in base_model.layers[:50]:
            layer.trainable = False
        for layer in base_model.layers[50:]:
            layer.trainable = True 
            
    elif arquitectura == 'MobileNetv2':
        base_model = tf.keras.applications.MobileNetV2(weights='imagenet', include_top=False, 
                                                input_shape=input_shape)
        
        #making the transfer learning
        for layer in base_model.layers[:70]:
            layer.trainable = False
        for layer in base_model.layers[70:]:
            layer.trainable = True 
            
    elif arquitectura == 'NasNetMobile':
        base_model = tf.keras.applications.NASNetMobile(weights='imagenet', include_top=False, 
                                                input_shape=input_shape)
        
        #making the transfer learning
        for layer in base_model.layers[:500]:
            layer.trainable = False
        for layer in base_model.layers[500:]:
            layer.trainable = True 
        
    elif arquitectura == 'Vgg16':
        base_model = tf.keras.applications.VGG16(weights='imagenet', include_top=False, 
                                                input_shape=input_shape)
        
        #making the transfer learning
        for layer in base_model.layers[:10]:
            layer.trainable = False
        for layer in base_model.layers[10:]:
            layer.trainable = True 
            
    elif arquitectura == 'InceptionV3':
        base_model = tf.keras.applications.InceptionV3(weights='imagenet',include_top=False, 
                                                input_shape=input_shape)
        
        #making the transfer learning
        for layer in base_model.layers[:275]:
            layer.trainable = False
        for layer in base_model.layers[275:]:
            layer.trainable = True         
        
    else:
        base_model = tf.keras.applications.ResNet50(weights='imagenet',include_top=False, 
                                                input_shape=input_shape)
        
        #making the transfer learning
        for layer in base_model.layers[:100]:
            layer.trainable = False
        for layer in base_model.layers[100:]:
            layer.trainable = True 
 
        
    x = base_model.output
    
    x = GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Flatten(name='flatten')(x)
    x = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01), name='fc1')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01), name='fc2')(x)
    preds = tf.keras.layers.Dense(num_clases, activation='softmax', name='predictions')(x)
        
    # if use_global_average_pooling == True:
    #     x=GlobalAveragePooling2D()(x)
    # else:
    #     x=Flatten()(x)

    # if use_batch_norm:
    #     x = BatchNormalization()(x)
    # if use_drop_out:
    #     x = Dropout(rate=0.5)(x)
    # x = Dense(first_dense_layer_neurons,activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)

    # if use_batch_norm:
    #     x = BatchNormalization()(x)
    # if use_drop_out:
    #     x = Dropout(rate=0.5)(x)
    # x = Dense(second_dense_layer_neurons,activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)

    # if use_batch_norm:
    #     x = BatchNormalization()(x)
    # if use_drop_out:
    #     x = Dropout(rate=0.5)(x)
    # preds = Dense(num_clases, activation='softmax')(x) # final layer with softmax activation

    custom_model = Model(inputs=base_model.input, outputs=preds)
                    
    return custom_model

## Data generator

In [None]:
def make_generator(df_train, df_val, HEIGHT, WIDTH):
    """
    Creates image generators for training and validation data from the given DataFrames.

    Parameters:
    df_train (pandas.DataFrame): DataFrame containing the training data with two columns, 'path' and 'label'.
    df_val (pandas.DataFrame): DataFrame containing the validation data with two columns, 'path' and 'label'.
    HEIGHT (int): The target height for the input images.
    WIDTH (int): The target width for the input images.
    tipo (str): The class mode for the generator. Can be 'binary' for binary classification or 'categorical' for multi-class.
    batch_size (int): The batch size for the data generator.

    Returns:
    train_generator (tensorflow.python.keras.preprocessing.image.DataFrameIterator): A data generator for training data.
    valid_generator (tensorflow.python.keras.preprocessing.image.DataFrameIterator): A data generator for validation data.

    Notes:
    - The function creates two image data generators, one for training data and one for validation data.
    - The training data generator reads data from the DataFrame 'df_train', and the validation data generator reads
      data from the DataFrame 'df_val'.
    - The 'tipo' parameter determines the class mode for the generator ('binary' for binary classification or
      'categorical' for multi-class classification).
    - The 'target_size' parameter is set to (HEIGHT, WIDTH) to resize the images to the specified dimensions.
    - The 'seed' parameter is set to 42 for reproducibility of random transformations applied to the images.
    - The 'shuffle' parameter is set to True to shuffle the data during each epoch.
    """
    # Generator for training data
    shear_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
    zoom_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
    width_shift_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
    height_shift_range = 0.1 #@param {type:"slider", min:0.0, max:1.0, step:0.05}
    rotation_range = 10 #@param {type:"slider", min:0, max:90, step:5}
    horizontal_flip = True #@param {type:"boolean"}
    vertical_flip = False #@param {type:"boolean"}
    
    def custom_rescale(img):
      return (img / 127.5) - 1.0
   
    datagen = ImageDataGenerator(preprocessing_function=custom_rescale,
                                shear_range=shear_range,
                                zoom_range=zoom_range,
                                width_shift_range=width_shift_range,
                                height_shift_range=height_shift_range,
                                rotation_range=rotation_range,
                                horizontal_flip=horizontal_flip,
                                vertical_flip=vertical_flip)

    train_generator = datagen.flow_from_dataframe(directory=None, 
                                                  dataframe=df_train,
                                                  x_col='path', 
                                                  y_col='label', 
                                                  target_size=(HEIGHT, WIDTH),
                                                  class_mode="categorical", 
                                                  batch_size=16,
                                                  seed=42,
                                                  shuffle=True)

    # Generator for validation data
    val_datagen = ImageDataGenerator(preprocessing_function=custom_rescale)

    valid_generator = val_datagen.flow_from_dataframe(directory=None,
                                                      dataframe=df_val,
                                                      x_col='path',
                                                      y_col='label',
                                                      batch_size=16,
                                                      seed=42,
                                                      shuffle=True,
                                                      class_mode="categorical",
                                                      target_size=(HEIGHT, WIDTH))

    return train_generator, valid_generator


## Train

In [None]:
def train_custom_layers(df_train, df_val, HEIGHT, WIDTH, arquitectura, clases, fine_tunning=False):
    
    print("making the generators")
    train_generator, valid_generator = make_generator(df_train, df_val, HEIGHT, WIDTH)
    
    def step_decay(epoch):
        initial_lrate = 0.1
        drop = 0.5
        epochs_drop = 5.0
        lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
        return lrate
    

    if fine_tunning == False:
        print("making the custom model")
        custom_model = make_model(arquitectura, HEIGHT, WIDTH, clases)
        #callbacks
        save_path = "../models/embc_extension/classifier/mri_spect/raw/first_approach/" + arquitectura + ".h5"
        lr = 0.0001    
        
        custom_model.compile(optimizer=Adam(learning_rate=lr), loss='categorical_crossentropy', 
                         metrics = ['accuracy']) 
        
    else:
        print("making fine tunning")
        name = arquitectura
        trained_model_path = '../models/classifier/mri_spect/preprocessed/first_approach/' + name + '.h5'
        custom_model = keras.models.load_model(trained_model_path, compile=True)
        
        print("unfreezing all the layers")
        for layer in custom_model.layers:
            layer.trainable = True
        
        #callbacks
        save_path = "../models/classifier/mri_spect/preprocessed/fine-tunning/" + arquitectura + "v3.h5"
        lr = 0.0001
        custom_model.compile(optimizer=Adam(learning_rate=lr), loss='categorical_crossentropy', metrics = ['accuracy']) 


    if fine_tunning == True:
        #Callbacks
        ##############################
        print("callback para refinamiento")
    
        callback_list = [#tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_accuracy', mode='max'),  
                         tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                                            patience=4, mode='min',
                                                             min_lr = 0.00000001)
                        ]
        
        custom_model.fit_generator(generator=train_generator,
                                steps_per_epoch=train_generator.n//train_generator.batch_size,
                                epochs=50, 
                                validation_data=valid_generator,
                                validation_steps=valid_generator.n//valid_generator.batch_size,
                                #class_weight=class_weight,
                                callbacks=callback_list)
    else:
        callback_list = [tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_accuracy', mode='max'),                                     
                    tf.keras.callbacks.ModelCheckpoint(filepath=save_path,
                                                      monitor = 'val_accuracy',
                                                      verbose=1,
                                                      save_best_only=True,
                                                      mode = 'max',
                                                      save_weights_only=False,
                                                      save_freq='epoch')]
        custom_model.fit_generator(generator=train_generator,
                                steps_per_epoch=train_generator.n//train_generator.batch_size,
                                epochs=50, 
                                validation_data=valid_generator,
                                validation_steps=valid_generator.n//valid_generator.batch_size,
                                #class_weight=class_weight,
                                callbacks=callback_list)
        
        
    
    #     print("class weights")
    #     total = df_train.shape[0]
    #     weights = (total/df_train.groupby('label').count().values)/3
    #     class_weight = {0:weights[0][0], 1:weights[1][0], 2:weights[2][0]}

    
        
    
    
    return custom_model

In [None]:
train_df, test_df = load_dataframes(task="mri_to_spect")
train_df.groupby('label').count()

In [None]:
test_df.groupby('label').count()

In [None]:
#sorting by case number and slice number for the following situations:
#preprocessing: from 41 to 132
#raw cases: 109 to 180

#sorting by case number and slice number
#train
train_df[['case_number', 'slice_number']] = train_df['path'].str.extract(r'_case_(\d+)_slice_(\d+).png').astype(int)
train_df_v2 = train_df[(train_df['slice_number'] > 41) & (train_df['slice_number'] < 132)]
train_df_v2.drop('slice_number', axis=1, inplace=True)
train_df_v2.drop('case_number', axis=1, inplace=True)

#test
test_df[['case_number', 'slice_number']] = test_df['path'].str.extract(r'_case_(\d+)_slice_(\d+).png').astype(int)
test_df_v2 = test_df[(test_df['slice_number'] > 41) & (test_df['slice_number'] < 132)]
test_df_v2.drop('slice_number', axis=1, inplace=True)
test_df_v2.drop('case_number', axis=1, inplace=True)

In [None]:
print("lengh of train: {}, lengh of test: {}".format(len(train_df_v2), len(test_df_v2)))

In [None]:
#this cell is for a general classification knowledge
HEIGHT, WIDTH = 256, 256
#nets = ['Vgg16', 'MobileNet', 'ResNet50']
nets = ['MobileNet']
num_clases=2
fine_tunning = False

for net in nets:
    arquitectura = net 
    np.random.seed(42)
    tf.random.set_seed(42)
    tf.keras.backend.clear_session()

    finetune_model = train_custom_layers(train_df_v2, test_df_v2, HEIGHT, WIDTH, arquitectura, num_clases, fine_tunning)

In [None]:
finetune_model.save("Data/franklin/Doctorado/parkinson/projects/parcellation_translation/models/classifier/mri_spect/preprocessed/fine-tunning/Vgg16v3.h5")

In [None]:
finetune_model.summary()

## Testing
### Making generator

In [None]:
def custom_rescale(img):
    return (img / 127.5) - 1.0
                                 
def make_generator(df_test, HEIGHT, WIDTH, batch_size):
    
    test_datagen=ImageDataGenerator(preprocessing_function=custom_rescale)
    
    test_generator=test_datagen.flow_from_dataframe(directory=None,
                                                    dataframe=df_test,
                                                    x_col='path',
                                                    y_col='label',
                                                    batch_size=batch_size,
                                                    seed=42,
                                                    shuffle=False,
                                                    class_mode="categorical",
                                                    target_size=(HEIGHT,WIDTH))

    return test_generator

In [None]:
#loading the model
name = "MobileNet"
model_path = '../models/embc_extension/classifier/mri_spect/raw/first_approach/' + name + '.h5'
model = keras.models.load_model(model_path, compile=True)

In [None]:
model.summary()

In [None]:
HEIGHT, WIDTH = 256, 256
tipo = 'categorical'
batch_size = 8

### Loading different kind of MRI or SPECT sources
**Loading the SPECT synthetic images**

In [None]:
test_df = pd.read_csv("../imgs_results/full_rois/raw/mri_to_spect/full_test_fullRois.csv", header=None)
test_df.columns = ['path', 'label']
test_df.groupby('label').count()

**Loading the cycleGan subset data**

In [None]:
test_control_df = pd.read_csv("../data/full_rois/mri/test_control.csv", header=None)
test_pd_df = pd.read_csv("../data/full_rois/mri/test_parkinson.csv", header=None)

test_control_df.columns = ['path', 'label']
test_pd_df.columns = ['path', 'label']

In [None]:
test_df = pd.concat([test_control_df, test_pd_df], axis=0)
test_df.groupby('label').count()

**Until here**

In [None]:
test_gen = make_generator(test_df_v2, HEIGHT, WIDTH, batch_size)

In [None]:
print(len(model.layers))

In [None]:
#Confution Matrix and Classification Report
test_gen.reset()
logits = model.predict(test_gen, test_df.shape[0] // batch_size+1)
y_pred_class = np.argmax(logits, axis=1)

target_names = ['control', 'parkinson']   
  
print('Confusion Matrix')
print(confusion_matrix(test_gen.classes, y_pred_class))
print('Classification Report')
print(classification_report(test_gen.classes, y_pred_class, target_names=target_names))

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

target_names = ['control', 'parkinson']   
cm = confusion_matrix(test_gen.classes, y_pred_class, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
disp = disp.plot(include_values=True, cmap=plt.cm.Blues, xticks_rotation='horizontal', values_format='.2f')

In [None]:
AUC = tf.keras.metrics.AUC()
AUC.update_state(test_gen.classes, y_pred_class)
AUC.result()