# <font color='red'>**Libraries**</font>

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

In [None]:
import tensorflow as tf
#import tensorflow_datasets as tfds
from tensorflow_examples.models.pix2pix import pix2pix
from os import listdir
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from numpy import vstack
from numpy import asarray
from numpy import savez_compressed
import numpy as np
from PIL import Image
from tqdm import tqdm
import os
import csv
import time
import matplotlib.pyplot as plt
from IPython.display import clear_output
from tensorflow import keras
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, ConfusionMatrixDisplay
import imageio
from tensorflow import keras
from skimage.transform import resize

AUTOTUNE = tf.data.AUTOTUNE

# <font color='red'>**Useful methods**</font>

In [None]:
def load_model(name):
    print("working on model: ", name)
    
    if name == 'vgg16':
        path = '../models/classifier/binary/vggFinetuned/new_vgg16v4.h5'
    else:
        path = '../models/classifier/binary/EfficientNetV2B0/EfficientNetV2B0.h5'
    
    model = keras.models.load_model(path, compile=False)
    
    return model

In [None]:
def predict(nom_video, test_df):
    
    batch = len(test_df)
    test_datagen = ImageDataGenerator()
    #Obtiene el número de frames.
    number_of_frames = test_df.Frame
    test_generator=test_datagen.flow_from_dataframe(dataframe=test_df,
                                                  directory=None,
                                                  x_col="path",
                                                  y_col="label",
                                                  batch_size=1,
                                                  seed=42,
                                                  shuffle=False,
                                                  class_mode="categorical",
                                                  target_size=(256, 256))
    pred = model.predict_generator(test_generator, steps = batch, verbose=1)
    for k in range(len(pred)):
        pred_k = pred[k]
        writer.writerow([nom_video, number_of_frames.iloc[k], pred_k[0], pred_k[1]]) 

In [None]:
def sort_testing_dataframe(test_df):
    test_df = test_df.sort_values(by='path')
    test_df['Frame'] = (
         test_df.apply(lambda x: int(x.path.split('/')[-1].split('_')[-1][:-4]), axis=1)
         )
    return test_df.sort_values(by='Frame')

In [None]:
def make_generator(current_df, HEIGHT, WIDTH, batch_size):
    
    test_datagen = ImageDataGenerator()

    test_generator=test_datagen.flow_from_dataframe(dataframe=current_df,
                                                    x_col="path",
                                                    y_col="label",
                                                    batch_size=batch_size,
                                                    seed=42,
                                                    shuffle=False,
                                                    class_mode="categorical",
                                                    target_size=(HEIGHT, WIDTH))
    
    return test_generator

In [None]:
def get_videos(split):
    if split=='valid':
        path = '../data/csv_files/adeVshyp/NBI/valNBI.csv'
    else:
        path = '../data/csv_files/adeVshyp/NBI/testNBI.csv'
    
    df = pd.read_csv(path, header=None)
    df.columns = ['path', 'label']
    
    videos = []
    for i in range(len(df)):
        path = df.iloc[i]['path']
        info = path.split('/')[-1]
        clase = info.split('_')[0]
        video = info.split('_')[3]
        to_save = clase + '_video_' + video
        videos.append(to_save)

    videos_set = set(videos)
    videos = list(videos_set)
    
    return df, videos

# <font color='red'>**Reading and testing valid videos**</font>
El objetivo aqui es leer el csv que contiene los frames completos de los videos de test y los serrated as additional test set. La idea es leer todo como un df y extraer iterativamente cada video de interes con el objetivo de conocer el dictamen general dado por el modelo (dado por mayoria de frames) a la hora de clasificar determinado video entre adenoma o hiperplasico.

In [None]:
labels={0: 'adenoma',
        1: 'hiperplastic'}

HEIGHT, WIDTH = 256, 256
batch_size = 16

## Knowing valid videos

In [None]:
val_df, val_videos = get_videos('valid')
val_df.groupby(['label']).count()

In [None]:
test_df, test_videos = get_videos('test')
test_df.groupby(['label']).count()

In [None]:
print("unique videos for valid set:")
print(val_videos)

In [None]:
print("unique videos for test set:")
print(test_videos)

## Accessing general NBI videos
change general_data path according each case:
* real NBI: '../../../../../data/polyp_original/NBI.csv'
* real WL: '../../../../../data/polyp_original/WL.csv'
* synthetic NBI: '../imgs_results/binary/embcBaseline/fold1/full_frames/embcBaselineArtifNbifold1.csv'

In [None]:
experiment = 'vgg_baseline'

In [None]:
general_data = '../imgs_results/binary/embcVariation/' +experiment+'/' + experiment + 'ArtifNbi.csv'
general_df = pd.read_csv(general_data, header=None)
general_df.columns = ['path', 'label']
general_df.head()

In [None]:
general_df['path'][6000]

In [None]:
general_df.groupby('label').count()

## <font color='red'>**Reading valid videos**</font>
### Mayoring voting

In [None]:
experiments = ['vgg_baseline', 'Mobilenet']
print("model loading... ")
model = load_model('vgg16')

for experiment in experiments:
    print("=========== WORKING ON: ", experiment, " ===========")
    save_path = '../imgs_results/binary/embcVariation/' + experiment + '/'
    
    print("===== reading dataframe ", experiment)
    #general_data = '../imgs_results/binary/embcVariation/test/'+experiment+'/'+experiment+'ArtifNbi.csv'
    
    general_df = pd.read_csv(general_data, header=None)
    general_df.columns = ['path', 'label']
    print("dataframe readed!")
    
    to_save = save_path + experiment + 'ArtifNbiPreds.txt'
    with open(to_save, 'w') as f:
        for video in test_videos:
            clase = video.split('_')[0]
            num_vid = video.split('_')[-1]
            print("==== working on class: ", clase, " video: ", num_vid, " ====")
            to_write = "==== WORKING ON: " +str(clase)+" video: "+str(num_vid)+" ===="
            f.write(to_write)
            f.write('\n')

            #searching for current valid video
            ade_probs, hyp_probs = [], []
            key = clase + '_WL/video_' + num_vid + '/'
            current_df = general_df[general_df['path'].str.contains(key)]        
            test_gen = make_generator(current_df, HEIGHT, WIDTH, batch_size)
            probs = model.predict(test_gen)
            for prob in probs:
                label = labels[np.argmax(prob)]
                if label == "adenoma":
                    value = round(np.max(prob), 2)
                    ade_probs.append(value)
                else:
                    value = round(np.max(prob), 2)
                    hyp_probs.append(value)

            if len(ade_probs) > len(hyp_probs):
                final_decision = "ADENOMA "
            else:
                final_decision = "HYPERPLASTIC "

            print("total frames: "+ str(len(current_df)) + " votes for adenoma: {} for hyperp: {}".
                  format(len(ade_probs), len(hyp_probs)))

            to_write = ("video: "+str(num_vid)+" total frames: "+str(len(current_df))+
            " votes for adenoma: "+str(len(ade_probs))+" votes for hyper: "+str(len(hyp_probs))+
            " final decision: " + final_decision)
            f.write(to_write)
            ade_probs = np.array(ade_probs)
            ade_mean = round(ade_probs.mean(),2)
            hyp_probs = np.array(hyp_probs)
            hyp_mean = round(hyp_probs.mean(),2)
            print("ade mean: {}, hyp mean: {}".format(ade_mean, hyp_mean))
            to_write = "adenoma mean: " + str(ade_mean) + " hyper mean: " + str(hyp_mean) + "\n"
            f.write(to_write)
    f.close()
    print("finished!")

## <font color='red'>**General metrics**</font>

### Frames decision

In [None]:
videos = sorted(test_videos)
videos

In [None]:
to_check = 'adenoma_WL/video_10'
tmp_df = general_df[general_df['path'].str.contains(to_check)]

for video in videos[1:]:
    clase = video.split('_')[0]
    num_vid = video.split('_')[-1]
    to_check = clase + '_WL/video_' + num_vid + '/'
    current_df = general_df[general_df['path'].str.contains(to_check)]
    tmp_df = pd.concat([tmp_df, current_df], axis=0)


tmp_df.describe()

In [None]:
print("first record: \n")
print(tmp_df.iloc[0]['path'])
print("last record: \n")
print(tmp_df.iloc[-1]['path'])

In [None]:
model = load_model('vgg16')

In [None]:
#Confution Matrix and Classification Report
test_gen = make_generator(tmp_df, HEIGHT, WIDTH, batch_size)
test_gen.reset()
logits = model.predict(test_gen, tmp_df.shape[0] // batch_size+1)
y_pred_class = np.argmax(logits, axis=1)

target_names = ['Adenoma', 'Hyperplastic']      

print('Confusion Matrix for experiment: ')#, experiment)
print(confusion_matrix(test_gen.classes, y_pred_class))
print('Classification Report')
print(classification_report(test_gen.classes, y_pred_class, target_names=target_names))

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
target_names = ['adenoma', 'hyperplastic']
cm = confusion_matrix(test_gen.classes, y_pred_class, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
disp = disp.plot(include_values=True, cmap=plt.cm.Blues, xticks_rotation='horizontal', values_format='.2f')
plt.show()

In [None]:
AUC = tf.keras.metrics.AUC()
AUC.update_state(test_gen.classes, y_pred_class)
AUC.result()

### Online prediction

In [None]:
videos

In [None]:
experiment

In [None]:
print("model loading... ")
model = load_model('vgg16')

save_path = '../imgs_results/binary/embcVariation/vgg_baseline/'
to_write = save_path + 'onlinePreds.csv'

print("===== leyendo dataframe para ", experiment)
general_df = pd.read_csv(general_data, header=None)
general_df.columns = ['path', 'label']
print("dataframe leido!")

with open( to_write, 'w', newline='') as file:
    writer = csv.writer(file)
    
    for video in videos:
        clase = video.split('_')[0]
        num_vid = video.split('_')[-1]
        to_check = clase + '_WL_video_' + str(num_vid) + '_img'
        single_df = general_df[general_df['path'].str.contains(to_check)]
        print("length of single_df: ", len(single_df))
        single_df = sort_testing_dataframe(single_df)
        nom_video = to_check
        predict(nom_video, clase, model, single_df)
    
    #for serrated samples
    for i in range(15):
        to_check = 'serrated_WL_video_' + str(i+1) + '_img'
        single_df = general_df[general_df['path'].str.contains(to_check)]
        print("length of single_df: ", len(single_df))
        single_df = sort_testing_dataframe(single_df)
        nom_video = to_check
        predict(nom_video, clase, model, single_df)