# Imports

In [None]:
from sklearn.preprocessing import (FunctionTransformer, 
                                   QuantileTransformer,
                                   MinMaxScaler,
                                   RobustScaler,
                                   StandardScaler,
                                   KBinsDiscretizer,
                                  )



In [None]:
%matplotlib inline
from pathlib import Path
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import PIL
import cv2
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import KBinsDiscretizer, minmax_scale


import glob

from keras.applications import Xception
from keras.applications.xception import preprocess_input


plt.rcParams["axes.facecolor"] = "white"
plt.rcParams["savefig.facecolor"] = "white"
plt.rcParams["grid.alpha"] = 1
plt.rcParams["grid.color"] = "#CCCCCC"


# Carregar Imagens

In [None]:
def transform_image(arr, f, **kwargs):
    arr = np.array(arr)
    assert arr.ndim == 2
    transformed_images = []
    for i in range(arr.shape[1]):
        
        transformed_images_i = []
        
        for image in arr[:,i]:
            image_shape = image.shape
            transformed = f(image.flatten(), **kwargs)            
            #shape preserving tranfsormation
            if isinstance(transformed, np.ndarray):
                if np.prod(transformed.shape) == np.prod(image_shape):
                    transformed = transformed.reshape(image_shape)
                else:
                    pass
            
            transformed_images_i.append(transformed)
        
        transformed_images.append(transformed_images_i)
    
    images = np.array(transformed_images).T
    return images

class ImageTransformer(FunctionTransformer):
    def __init__(self, function):
        self.function = function
        super().__init__(lambda x: transform_image(x, function))

In [None]:
def get_histogram_stat(img, stat = np.mean):    
    #assert is black and white and 2d
    assert img.ndim == 2
    return stat(img)

def to_black_and_white(img, dtype = np.int16, asarray = True, normalize_range = (0,255)):
    """
    transforms the image to black and white one, with 2 channels only.
    its possible to normalize and cast, if an array is returned
    """
    if isinstance(img, PIL.BmpImagePlugin.BmpImageFile):
        arr = np.array(img.convert('L'))
    else:
        arr = img
    
    if normalize_range is None:
        arr = arr.astype(dtype)
    else:
        arr =  minmax_scale(arr, normalize_range, axis = None).astype(dtype)
        
    if asarray:
        return arr
    else:
        return Image.fromarray(arr)
        

def get_histogram_stat_vector(images, stat = np.mean, normalization_function = to_black_and_white):
    """
    allows vectorized operations on ImageLoader instance or collection of images/np.arrays
    """
    if isinstance(images, ImageLoader):
        results = images.map(lambda img: stat(normalization_function(img)))
    else:
        results = list(map(lambda img: stat(normalization_function(img)), images))
        
    return results

In [None]:
from PIL import Image, ImageOps


def padding(img, expected_size):
    desired_size = expected_size
    delta_width = desired_size - img.size[0]
    delta_height = desired_size - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)


def resize_with_padding(img, expected_size):
    img.thumbnail((expected_size[0], expected_size[1]))
    # print(img.size)
    delta_width = expected_size[0] - img.size[0]
    delta_height = expected_size[1] - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return ImageOps.expand(img, padding)


def gray_to_rgb(img):
    if img.ndim == 3:
        img = img.reshape((*img.shape[:-1]))
    
    img2 = np.zeros( ( np.array(img).shape[0], np.array(img).shape[1], 3 ) )
    img2[:,:,0] = img # same value in each channel
    img2[:,:,1] = img
    img2[:,:,2] = img
    return img2

In [None]:
class ImageLoader():
    """
    loads images from path given a list of wildcards
    """
    def __init__(self, root_dir, mask_extension = ".bmp", image_extension = ".bmp", load_masks = True):
        self.load_masks = load_masks
        
        label_paths = Path(root_dir).iterdir()
        #set labels based on parent folder
        all_items = []
        for label_path in label_paths:
            label = str(label_path).split("/")[-1]
            if load_masks:
                mask_paths = [str(i) for i in set(label_path.glob("*_mask*"))]
                image_paths = []
                new_mask_paths = []
                for i in range(len(mask_paths)):
                    mask_path = mask_paths[i]
                    im_paths = list(Path(label_path).rglob("*" + mask_path.split("/")[-1].split("_mask")[0] +"*"))
                    im_paths = [i for i in im_paths if not "_mask" in str(i)]
                    if len(im_paths) > 0:

                        new_mask_paths.append(mask_path)
                        image_paths.append(str(im_paths[0]))
                    else:
                        pass
                #
                labels = [label]*len(image_paths)
                items = list(zip(image_paths, new_mask_paths, labels))
                all_items += items
            else:
                all_paths = [str(i) for i in label_path.rglob("*")]                                
                image_paths = [i for i in all_paths if not "_mask" in i]
                mask_paths = [i for i in all_paths if "_mask" in i]
                mask_prefix = [i.split("_mask")[0] for i in mask_paths] 
                labels = [label]*len(image_paths)
                has_mask = [i[:-4] in mask_prefix for i in image_paths]
                items = list(zip(image_paths, has_mask, labels))
                all_items += items
                
        
        self.items = all_items
        return
        
    def __len__(self):
        return len(self.items)
    def __getitem__(self, idx):
        if self.load_masks:
            return {"image":Image.open(self.items[idx][0]), "mask": Image.open(self.items[idx][1]), "label":self.items[idx][2]}        
        else:
            return {"image":Image.open(self.items[idx][0]), "has_mask":self.items[idx][1], "label":self.items[idx][2]}        
    
    def get_image_array(self, index, size = None, to_rgb = False, normalize = False, normalize_range = (0,255)):

        image = Image.open(self.items[index][0])
        if normalize:
            image = np.array(image)
            shape = image.shape
            image = minmax_scale(image.ravel(), feature_range=normalize_range).reshape(shape)
            image = Image.fromarray(image)
        
        if to_rgb:           
            image = image.convert("RGB")

        if not size is None:
            image = resize_with_padding(image, size)
    
        return np.array(image)
    
    def get_mask_array(self, index):
        mask = Image.open(self.items[index][1])
        mask_array = np.array(mask)
        assert len(np.unique(mask_array.flatten())) <= 2, "mask has more than two values"
        mask_array = (mask_array - mask_array.min())/(mask_array.max()-mask_array.min())
        return mask_array.astype(bool)
    
    def get_label(self, index):
        return self.items[index][2]
    
    def get_masked_image(self, index):
        return np.where(self.get_mask_array(index), self.get_image_array(index), 0)
    
    def plot_masked_image(self, index, alpha = 0.5):
        plt.imshow(self.get_image_array(index), cmap = "gray")
        plt.imshow(self.get_masked_image(index), alpha = alpha, cmap = "gray")
        return
    
    def get_masked_flat_image(self, index, return_index = False):
        image_flat = self.get_image_array(index).flatten()
        mask_flat = self.get_mask_array(index).flatten()        
        image_flat = image_flat[mask_flat]
        if not return_index:
            return image_flat
        else:
            return image_flat, mask_flat.nonzero()[0]
            
    def get_image_id(self, index):
        return self.items[index][0].split('/')[-1].split("_FLAIR")[0]
    
    def get_flair_id(self, index):
        return self.items[index][0].split('/')[-1].split("_FLAIR")[-1].split('.')[0]
    
    def map(self, function, attribute = 'image'):
        assert attribute in (None, "image","label","mask")
        if not attribute is None:
            vals = [i[attribute] for i in self]
        else:
            vals = [i for i in self]
        
        results = list(map(function, vals))
        return results
    
    

In [None]:
loader = ImageLoader("../Train", load_masks = False)
loader_test = ImageLoader("../SLE/", load_masks = False)

In [None]:
len(loader),len(loader_test)

In [None]:
#all(loader.map(lambda x: np.array(x).ndim == 2)), all(loader_test.map(lambda x: np.array(x).ndim == 2))

In [None]:
plt.imshow(loader_test.get_image_array(0, size = (512,512), to_rgb = True))
loader_test.get_image_array(0, size = (512,512), to_rgb = True).shape

In [None]:
#loader_test.plot_masked_image(100, alpha = 0.8)


In [None]:
#loader.get_mask_array(212)

## Cria DataFrame com informações

In [None]:
ids_train = np.array([loader.get_image_id(i) for i in range(len(loader))])
ids_test = np.array([loader_test.get_image_id(i) for i in range(len(loader_test))])
ids_all = np.hstack([ids_train, ids_test])

In [None]:
flairs_train = np.array([loader.get_flair_id(i) for i in range(len(loader))])
flairs_test = np.array([loader_test.get_flair_id(i) for i in range(len(loader_test))])
flairs_all = np.hstack([flairs_train, flairs_test])

In [None]:
labels_train = np.array([i["label"] for i in loader])
labels_test = np.array([i["label"] for i in loader_test])
labels_all = np.hstack([labels_train, labels_test])

In [None]:
has_mask_train = np.array([i["has_mask"] for i in loader])
has_mask_test = np.array([i["has_mask"] for i in loader_test])
has_mask_all = np.hstack([has_mask_train, has_mask_test])

In [None]:
import pandas as pd
id_df = pd.DataFrame(ids_all, columns = ["id"])
id_df["flair"] = flairs_all.astype(int)
id_df["label"] = labels_all
id_df["has_mask"] = has_mask_all

In [None]:
id_df["has_mask"].mean()

In [None]:
id_df.groupby(["label","flair"])[["has_mask"]].mean()

# Extrai Atributos

In [None]:
from keras.applications import EfficientNetV2S

In [None]:
from tensorflow.keras.preprocessing import image
import tensorflow as tf

In [None]:
image_size = (299,299)
pixel_range = (0,255)

In [None]:
feature_extractor = EfficientNetV2S(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=(*image_size,3),
    pooling="max",
    #classes=1000,
    #classifier_activation="softmax",
    include_preprocessing=True,
)

In [None]:
images_train = [
    loader.get_image_array(
        i,
        image_size,
        to_rgb = True,
        normalize = True,
        normalize_range=pixel_range
    ) for i in range(len(loader))
]

In [None]:
images_test = [
    loader_test.get_image_array(
        i,
        image_size,
        to_rgb = True,
        normalize = True,
        normalize_range=pixel_range
    ) for i in range(len(loader_test))
]

In [None]:
images_train = np.array(images_train)

In [None]:
images_test = np.array(images_test)

In [None]:
features_train = feature_extractor.predict(images_train)

In [None]:
features_test = feature_extractor.predict(images_test)

# Plot do manifold (espaço reduzido) de atributos

## Completamente não supervisionado
* é possível ver que de forma completamente não supervisionada, as features extraidas são muito competentes em separar as classes
* é possível ver que os casos de SLE são mais parecidos com casos de AVC, ainda que existam alguns casos que se afastam do padrão, na região populada apenas por SLE

## Checa viés de flair

Checamos aqui se alguma classe possui uma probabilidade maior para flairs específicos. Isso pode fazer com que o modelo aprenda a discriminar a classe baseando-se no flair, o que pode ser ruim para seu poder de generalização.

Foi observado que para EM, os flairs são maiores enquanto os flairs para AVC e SLE possuem distribuição semelhante. Esse viés provavelmente ocorre no momento da coleta dos dados, em que o responsável pela imagem limita os flairs de acordo com o flair anterior

In [None]:
#checa viés de flair
id_df.query('label != "Test" and has_mask == True').groupby("label").apply(lambda x: sns.distplot(x["flair"], label = x["label"].iloc[0]))
plt.legend()
plt.title("distribuição de flairs por label")
plt.savefig("./images/vies_de_flair.png", bbox_inches= "tight")

In [None]:
#checa viés de flair
id_df.query('label != "Test"').groupby("label").apply(lambda x: sns.distplot(x["flair"], label = x["label"].iloc[0]))
plt.legend()
plt.title("distribuição de flairs por label")
plt.savefig("./images/vies_de_flair_sem_mascara.png", bbox_inches= "tight")

In [None]:
import umap
from sklearn.decomposition import PCA

In [None]:
reducer = umap.UMAP()

In [None]:
features_all = np.vstack([features_train, features_test])
labels_all = np.hstack([labels_train, labels_test])

In [None]:
embs = reducer.fit_transform(features_all)

In [None]:
pcs = PCA().fit_transform(features_all)

### Por label

In [None]:
import plotly.express as px

In [None]:
#px.scatter(x = embs[:,0], y = embs[:,1], color = id_df["label"])

### Por flair

é possível ver que as features extraídas também são capazes de segmentar as flairs por similridade

In [None]:
id_df['umap0'] = embs[:,0] 
id_df['umap1'] = embs[:,1] 

id_df['pca0'] = pcs[:,0] 
id_df['pca1'] = pcs[:,1] 

In [None]:
sns.scatterplot(x = id_df["umap0"], y = id_df["umap1"], hue = id_df["has_mask"], style = id_df["label"],)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP para espaço de features")
plt.savefig("./images/umap_hasmask.png",bbox_inches = "tight", dpi = 50)

In [None]:
sns.scatterplot(x = id_df["umap0"], y = id_df["umap1"], hue = id_df["flair"], style = id_df["label"],)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP para espaço de features")
plt.savefig("./images/umap_flair.png",bbox_inches = "tight", dpi = 50)

In [None]:
sns.scatterplot(x = id_df["umap0"], y = id_df["umap1"], hue = id_df["label"], style = id_df["has_mask"],)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP para espaço de features")
plt.savefig("./images/umap_label.png",bbox_inches = "tight",dpi = 50)

In [None]:
d = id_df.query("has_mask == True")
sns.scatterplot(x = d["umap0"], y = d["umap1"], hue = d["label"], style = d["has_mask"],)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP para espaço de features")
plt.savefig("./images/umap_label_mask_only.png",bbox_inches = "tight",dpi = 50)

In [None]:
d = id_df.query("has_mask == False")
sns.scatterplot(x = d["umap0"], y = d["umap1"], hue = d["label"], style = d["has_mask"],)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP para espaço de features")
plt.savefig("./images/umap_label_no_mask_only.png",bbox_inches = "tight",dpi = 50)

## Espaço escalado por estimator Lasso para reforçar esparsidade

é possível ver que a separação é ainda mais forte

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.model_selection import cross_val_predict, cross_val_score

In [None]:
from sklearn.metrics import auc, roc_auc_score, roc_curve, classification_report

In [None]:
class LinearScaledEmbeddings(TransformerMixin, BaseEstimator):
    
    def __init__(self, estimator):
        self.estimator = estimator
    
    def fit(self, X, y = None,**kwawrgs):
        
        self.scaler = QuantileTransformer().fit(X)
        X = self.scaler.transform(X)
        self.estimator.fit(X, y,**kwawrgs)
        return self
    
    def transform(self, X, **umap_kwargs):
        #X = self.scaler.transform(X)
        X = self.estimator.coef_*X
        X = umap.UMAP(**umap_kwargs).fit_transform(X)
        return X

In [None]:
sle_mask = pd.Series(labels_all).isin(["AVC", "SLE"]).values
features_sle = features_all[sle_mask]
labels_sle = labels_all[sle_mask]

In [None]:
estimator = LogisticRegression(penalty = 'l1', solver = "saga", )
scaler = LinearScaledEmbeddings(estimator).fit(features_sle, labels_sle)

In [None]:
from sklearn.pipeline import make_pipeline

lasso_preds = cross_val_predict(make_pipeline(QuantileTransformer(), estimator), features_sle, labels_sle, method = "predict_proba")

In [None]:
roc_auc_score(labels_sle, lasso_preds[:,1], labels = scaler.estimator.classes_)

In [None]:
id_df["lasso_proba_max"] = scaler.estimator.predict_proba(features_all)[:,1]

In [None]:
d = id_df.query("label in ('SLE','AVC')")
sns.jointplot(d["lasso_proba_max"], d["flair"].astype(int), alpha = 0.2, hue = d["label"], style = d["has_mask"])

plt.title("")

In [None]:
lasso_embs = scaler.transform(features_all)

In [None]:
id_df['lasso0'] = lasso_embs[:,0]
id_df['lasso1'] = lasso_embs[:,1]

In [None]:
sns.distplot(scaler.estimator.coef_.flatten())
plt.title("Distribuição de coeficientes Lasso")
plt.savefig("./images/dist_lasso.png",bbox_inches = "tight")

In [None]:
d = id_df#.query("has_mask == False")
sns.scatterplot(x = d["lasso0"], y = d["lasso1"], hue = d["label"], style = d["has_mask"],)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP-LASSO para espaço de features")
plt.savefig("./images/lasso_label_only.png",bbox_inches = "tight")

In [None]:
d = id_df.query("has_mask == True")
sns.scatterplot(x = d["lasso0"], y = d["lasso1"], hue = d["label"], size = d["flair"], alpha = 0.8)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP para espaço de features")
plt.savefig("./images/umap_label_mask_only.png",bbox_inches = "tight", dpi = 50)

## Treinar algoritmo KNN no espaço UMAP

In [None]:
from sklearn.neighbors import KNeighborsClassifier, NearestNeighbors

In [None]:
knn = KNeighborsClassifier(n_neighbors=20, metric = "minkowski", p=2)

In [None]:
sle_mask = id_df["label"].isin(["AVC", "SLE"]).values
knn.fit(id_df[sle_mask][["lasso0","lasso1"]], id_df[sle_mask]["label"])

In [None]:
knn_dist, knn_neighbors = knn.kneighbors(id_df[["lasso0","lasso1"]])

In [None]:
id_df["neighbors"] = [i for i in knn_neighbors]
id_df["distances"] = [i for i in knn_dist]

In [None]:
knn.classes_

In [None]:
knn_preds = knn.predict_proba(id_df[["lasso0","lasso1"]])

In [None]:
id_df["knn_proba"] = knn_preds[:,0] #proba da classe AVC. quanto mais perto de 1, maior a chance de se tratar de uma etiologia isquemica
id_df["pseudo_etiologia"] = "isquemica"
id_df.loc[id_df["knn_proba"] == 0, "pseudo_etiologia"] = "desmielinizante"

In [None]:
d = id_df.query("has_mask == True")
sns.scatterplot(x = d["lasso0"], y = d["lasso1"], hue = d["label"], style = d["pseudo_etiologia"], alpha = 0.8, s = 200)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP-Lasso para espaço de features com separação KNN")
plt.savefig("./images/lasso_label_mask_only_pseudo_etiologia.png",bbox_inches = "tight",dpi = 50)

In [None]:
d = id_df
sns.scatterplot(x = d["lasso0"], y = d["lasso1"], hue = d["knn_proba"], alpha = 0.8, s = 200)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP-Lasso para espaço de features com separação KNN")
plt.savefig("./images/lasso_knn_proba.png",bbox_inches = "tight", dpi = 50)

### Análise agregando todas as image_ids
é possível ver um corte em torno de 0.25

In [None]:
def agg_fu(df):
    d ={}
    d["lasso0"] = df["lasso0"].mean()
    d["lasso1"] = df["lasso1"].mean()
    d["knn_proba"] = df["knn_proba"].mean()
    d["label"] = df["label"].iloc[0]
    return pd.Series(d)

d = (
    id_df
    .query("has_mask == True")
    .groupby("id").apply(agg_fu)
)

plt.axvline(0.3, color = 'red', label = "split de etiologia", linestyle = '--')
sns.kdeplot(d["knn_proba"], hue = d["label"], )
sns.rugplot(d["knn_proba"], hue = d["label"])

plt.title("Distribuição de probabilidade de KNN aagregado paciente")
plt.savefig("./images/knn_dist_threshold.png",bbox_inches = "tight")

In [None]:
d["pseudo_etiologia"] = "isquemica"
d.loc[d["knn_proba"] < 0.25, "pseudo_etiologia"] = "desmielinizante"

In [None]:
sns.scatterplot(x = d["lasso0"], y = d["lasso1"], hue = d["label"], style = d["pseudo_etiologia"], alpha = 0.8, s = 200)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP-Lasso para espaço de features com separação KNN por paciente")
plt.savefig("./images/lasso_flair_mask_only_pseudo_etiologia_agregado.png",bbox_inches = "tight", dpi = 50)

In [None]:
d = (
    id_df
    #.query("has_mask == True")
    .groupby("id").apply(agg_fu)
)

In [None]:
d["pseudo_etiologia"] = "isquemica"
d.loc[d["knn_proba"] < 0.25, "pseudo_etiologia"] = "desmielinizante"

In [None]:
sns.scatterplot(x = d["lasso0"], y = d["lasso1"], hue = d["label"], style = d["pseudo_etiologia"], alpha = 0.8, s = 200)
plt.subplots_adjust(-1,-1)
plt.title("Projeção de UMAP-Lasso para espaço de features com separação KNN por paciente")
plt.savefig("./images/lasso_flair_pseudo_etiologia_agregado.png",bbox_inches = "tight", dpi = 50)

In [None]:
id_df = id_df.merge(d[["pseudo_etiologia"]].rename(columns = {'pseudo_etiologia':"pseudo_etiologia_agg"}), left_on = "id", right_index = True, how = "left")

# Análise Qualitativa

In [None]:
import math
def ceildiv(a, b):
    return -(a // -b)

In [None]:
impath = "../Train/AVC/047_FLAIR17.bmp"
mskpath = "../Train/AVC/047_FLAIR17_mask*"

def plot_masked_image(ax, impath, mskpath, alpha = 0.8):
    mskpath = glob.glob(mskpath)[0]

    im = np.array(Image.open(impath))
    msk = np.array(Image.open(mskpath))
    msk = (msk - msk.min())/(msk.max()-msk.min())*255

    ax.imshow(msk, cmap = "gray")
    ax.imshow(im, cmap = "gray", alpha = alpha)
    ax.axis("off")
    return ax

def plot_df_images(df, width = 4, alpha = 0.8):
    size = len(df)
    height = ceildiv(size,width)
    fig, axs = plt.subplots(width,height, gridspec_kw={'wspace':0, 'hspace':0},
                           squeeze=True)
    print(width,height)
    for i in range(size):
        impath = df.iloc[i]["image_path"]
        mskpath = df.iloc[i]["mask_path"]
        if axs.ndim > 1:
            axs[i%width,i//width] = plot_masked_image(axs[i%width,i//width], impath, mskpath, alpha = alpha)
        else:
            axs[i%width] = plot_masked_image(axs[i%width], impath, mskpath, alpha = alpha)
    
    return fig


def plot_images_from_df(df, **kwargs):
    images = []
    for path in df["image_path"]:
        im = np.array(Image.open(path))
        im = (im - im.min())/(im.max()-im.min())*255
        images.append(im)
    
    return snsi.ImageGrid(images, **kwargs)
    

In [None]:
import seaborn_image as snsi

In [None]:
id_df['pseudo_etiologia_agg'].unique()

In [None]:
flair = 17
pseudo_etiologia = "desmielinizante"
proximo_de_desmielinizante = False # quanto maior a probabilidade mais perto de isquemico

proximidade_str = "tendendo a dismeilinizante" if proximo_de_desmielinizante else "tendendo a isquemico"
fig = (
    plot_images_from_df(
        id_df.query(f"label != 'EM' and has_mask == True and flair == {flair} and pseudo_etiologia_agg == '{pseudo_etiologia}'")
        .sort_values(by = "knn_proba", ascending = proximo_de_desmielinizante)
        .iloc[:9],
        cmap = "gray"
    )
)

fig.fig.suptitle(
    f" Imagens de flair {flair}, de pseudo-etiologia {pseudo_etiologia}, {proximidade_str}",
    fontsize = 14
)

plt.savefig(
    f"./images/imagens_flair{flair}_{pseudo_etiologia}_{proximidade_str.replace(' ','_')}",
    bbox_inches = "tight",
    dpi = 50
)

In [None]:
flair = 17
pseudo_etiologia = "isquemica"
proximo_de_desmielinizante = False # quanto maior a probabilidade mais perto de isquemico

proximidade_str = "tendendo a dismeilinizante" if proximo_de_desmielinizante else "tendendo a isquemico"
fig = (
    plot_images_from_df(
        id_df.query(f"label != 'EM' and has_mask == True and flair == {flair} and pseudo_etiologia_agg == '{pseudo_etiologia}'")
        .sort_values(by = "knn_proba", ascending = proximo_de_desmielinizante)
        .iloc[:9],
        cmap = "gray"
    )
)

fig.fig.suptitle(
    f" Imagens de flair {flair}, de pseudo-etiologia {pseudo_etiologia}, {proximidade_str}",
    fontsize = 14
)

plt.savefig(
    f"./images/imagens_flair{flair}_{pseudo_etiologia}_{proximidade_str.replace(' ','_')}",
    bbox_inches = "tight",
    dpi = 50
)

In [None]:
flair = 17
pseudo_etiologia = "desmielinizante"
proximo_de_desmielinizante = True # quanto maior a probabilidade mais perto de isquemico

proximidade_str = "tendendo a dismeilinizante" if proximo_de_desmielinizante else "tendendo a isquemico"
fig = (
    plot_images_from_df(
        id_df.query(f"label != 'EM' and has_mask == True and flair == {flair} and pseudo_etiologia_agg == '{pseudo_etiologia}'")
        .sort_values(by = "knn_proba", ascending = proximo_de_desmielinizante)
        .iloc[:9],
        cmap = "gray"
    )
)

fig.fig.suptitle(
    f" Imagens de flair {flair}, de pseudo-etiologia {pseudo_etiologia}, {proximidade_str}",
    fontsize = 14
)

plt.savefig(
    f"./images/imagens_flair{flair}_{pseudo_etiologia}_{proximidade_str.replace(' ','_')}",
    bbox_inches = "tight",
    dpi = 50
)

In [None]:
flair = 17
pseudo_etiologia = "isquemica"
proximo_de_desmielinizante = True # quanto maior a probabilidade mais perto de isquemico

proximidade_str = "tendendo a dismeilinizante" if proximo_de_desmielinizante else "tendendo a isquemico"
fig = (
    plot_images_from_df(
        id_df.query(f"label != 'EM' and has_mask == True and flair == {flair} and pseudo_etiologia_agg == '{pseudo_etiologia}'")
        .sort_values(by = "knn_proba", ascending = proximo_de_desmielinizante)
        .iloc[:9],
        cmap = "gray"
    )
)

fig.fig.suptitle(
    f" Imagens de flair {flair}, de pseudo-etiologia {pseudo_etiologia}, {proximidade_str}",
    fontsize = 14
)

plt.savefig(
    f"./images/imagens_flair{flair}_{pseudo_etiologia}_{proximidade_str.replace(' ','_')}",
    bbox_inches = "tight",
    dpi = 50
)

## Plots

In [None]:
# import plotly.graph_objs as go
# import plotly.offline as py

# import pandas as pd
# import numpy as np
# from ipywidgets import interactive, HBox, VBox

# py.init_notebook_mode()

# df = pd.read_csv('https://raw.githubusercontent.com/jonmmease/plotly_ipywidget_notebooks/master/notebooks/data/cars/cars.csv')

# f = go.FigureWidget([go.Scatter(y = df['City mpg'], x = df['City mpg'], mode = 'markers')])
# scatter = f.data[0]
# N = len(df)
# scatter.x = scatter.x + np.random.rand(N)/10 *(df['City mpg'].max() - df['City mpg'].min())
# scatter.y = scatter.y + np.random.rand(N)/10 *(df['City mpg'].max() - df['City mpg'].min())
# scatter.marker.opacity = 0.5

# # Create a table FigureWidget that updates on selection from points in the scatter plot of f
# im_kwargs = dict(
#     facet_col=0,
#     binary_string=True,
#     facet_col_wrap=5,
#     height=800,
#     width=1200,
#     facet_col_spacing=0,
#     facet_row_spacing=0,
# )
# t = px.imshow(
#     images_train[:20],
#     **im_kwargs,
#     )

# t  = go.FigureWidget(t)
# import matplotlib.pyplot as plt
# import seaborn as sns

# def selection_fn(trace,points,selector):
#     with t.batch_update():
#         idxs = np.random.choice(range(100),size = 20, replace = False)
#         #new = go.FigureWidget(imshow(images_train[idxs]))       
#         kws = {**im_kwargs, **{"height":300*ceildiv(len(idxs),5)}}
#         new = go.FigureWidget(px.imshow(images_train[idxs], **kws))
#         t.update({'data':new.data,'layout':new.layout,"frames":new.frames}, overwrite = True)
#         #for i in range(len(t.data)):    
#         #    t.data[i] = data[i]

# scatter.on_selection(selection_fn)

# # Put everything together
# VBox((f,t))
