# Imports

In [None]:
# python peripherals
import random
import os
import sys
import math
from pathlib import Path

# numpy
import numpy

# pandas
import pandas

# ipython
from IPython.display import display, HTML

# matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.lines

# plotly
import plotly.express as px

# pytorch
import torch
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.sampler import SequentialSampler
from torch.utils.data import DataLoader
from torchvision import transforms

# wsi-mil
sys.path.insert(1, os.path.join(sys.path[0], './..'))
from nn import datasets
import utils
from nn import trainers

# openslide
import openslide

# Learning Curve

In [None]:
results_dir_path = 'C:/GitHub/WSI_MIL/results'

def plot_learning_curve(results_dir_path, title):
    latest_subdir = utils.get_latest_subdirectory(results_dir_path)
    results = numpy.load(f"{latest_subdir}/results.npy", allow_pickle=True).item()

    start_index = 0
    end_index = None
    train_loss_array = results['train_loss_array'][start_index:] if end_index is None else results['train_loss_array'][start_index:end_index]
    validation_loss_array = results['validation_loss_array'][start_index:] if end_index is None else results['validation_loss_array'][start_index:end_index]

    train_loss_array_no_nan = train_loss_array[~numpy.isnan(train_loss_array)]
    validation_loss_array_no_nan = validation_loss_array[~numpy.isnan(validation_loss_array)]

    epochs_list = numpy.array(range(len(train_loss_array)))

    fig, ax = plt.subplots(1, 1, figsize=(20,20))
    ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))

    for label in (ax.get_xticklabels() + ax.get_yticklabels()):
        label.set_fontsize(20)

    ax.plot(epochs_list, train_loss_array, label='Train Loss', linewidth=4.0)
    ax.plot(epochs_list, validation_loss_array, label='Validation Loss', linewidth=4.0)
    ax.set_title(title, fontsize=30)

    plt.legend(fontsize=20, title_fontsize=20)

    print(numpy.min(train_loss_array_no_nan))
    print(numpy.min(validation_loss_array_no_nan))

    plt.show()

plot_learning_curve(results_dir_path, 'Learning Curve')

In [None]:
dataset_name = 'TCGA'
tile_size = 256
desired_magnification = 10
minimal_tiles_count = 10
test_fold = 1
datasets_base_dir_path = f'D:\Pathology'
negative_examples_count = 2
dataset_size = 200
batch_size = 4

# desired_downsample = 4
# slide = openslide.open_slide(f'C:\GitHub\WSI_MIL\data\Breast\TCGA\TCGA-3C-AALI-01A-01-TSA.7D4960A7-247F-46EE-8D4A-B55170C23EAA.svs')
# level, adjusted_tile_size = datasets.WSIDistanceDataset.get_best_level_for_downsample(slide=slide, desired_downsample=desired_downsample, tile_size=tile_size)
# tile_size_level_0 = tile_size * desired_downsample
# print(f'adjusted_tile_size = {adjusted_tile_size}')
# print(f'level = {level}')
# print(slide.level_downsamples)
# cols = 15
# fig, axes = plt.subplots(nrows=1, ncols=cols, figsize=(40, 80))
# for i in range(cols):
#     tile = slide.read_region((i*(tile_size_level_0), 4*(tile_size_level_0)), level, (adjusted_tile_size, adjusted_tile_size)).convert('RGB')
#     axes[i].imshow(tile)
#     axes[i].axis('off')

# plt.show()

# desired_downsample = 2
# slide = openslide.open_slide(f'C:\GitHub\WSI_MIL\data\Breast\TCGA\TCGA-BH-A1FG-11B-01-TS1.d72ab8b1-085b-4ede-9837-c8f6800ed175.svs')
# level, adjusted_tile_size = datasets.WSIDistanceDataset.get_best_level_for_downsample(slide=slide, desired_downsample=desired_downsample, tile_size=tile_size)
# tile_size_level_0 = tile_size * desired_downsample
# print(f'adjusted_tile_size = {adjusted_tile_size}')
# print(f'level = {level}')
# print(slide.level_downsamples)
# cols = 15
# fig, axes = plt.subplots(nrows=1, ncols=cols, figsize=(40, 80))
# for i in range(cols):
#     tile = slide.read_region((i*(tile_size_level_0), 4*(tile_size_level_0)), level, (adjusted_tile_size, adjusted_tile_size)).convert('RGB')
#     axes[i].imshow(tile)
#     axes[i].axis('off')

# plt.show()

train_dataset = datasets.WSIDistanceDataset(
    dataset_size=dataset_size,
    buffer_size=50,
    replace=True,
    num_workers=1,
    dataset_name=dataset_name,
    tile_size=tile_size,
    desired_magnification=desired_magnification,
    minimal_tiles_count=minimal_tiles_count,
    test_fold=test_fold,
    train=True,
    datasets_base_dir_path=datasets_base_dir_path,
    max_size=50,
    inner_radius=2,
    outer_radius=11)

train_dataset.start()

# for i, item in enumerate(train_dataset):
#     if i == 5:
#         break

#     fig, axes = plt.subplots(nrows=1, ncols=2 + negative_examples_count, figsize=(40, 80))

#     print(item['input'][0].shape)

#     anchor = transforms.ToPILImage()(item['input'][0]).convert("RGB")
#     positive = transforms.ToPILImage()(item['input'][1]).convert("RGB")

#     axes[0].imshow(anchor)
#     axes[0].axis('off')
#     axes[0].set_title('Anchor Tile')
#     axes[1].imshow(positive)
#     axes[1].axis('off')
#     axes[1].set_title('Positive Tile')

#     for i in range(2,negative_examples_count + 2):
#         print(item['input'][i].shape)
#         negative = transforms.ToPILImage()(item['input'][i]).convert("RGB")
#         axes[i].imshow(negative)
#         axes[i].axis('off')
#         axes[i].set_title('Negative Tile')

#     plt.show()

trainer = trainers.WSIDistanceModelTrainerTest()
indices = list(range(dataset_size))
sampler = SubsetRandomSampler(indices)
data_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, drop_last=False, num_workers=0)
for batch_index, batch_data in enumerate(data_loader, 0):
    preprocessed_input_features = trainer._preprocess_batch(batch_data=batch_data)
    output = preprocessed_input_features.reshape([batch_data['input_features'].shape[0], batch_data['input_features'].shape[1], 3, 256, 256])

    display(HTML(f'<H1>Batch {batch_index}'))

    for tuple_index in range(output.shape[0]):
        fig, axes = plt.subplots(nrows=1, ncols=2 + negative_examples_count, figsize=(10, 20))

        anchor_pic = transforms.ToPILImage()(output[tuple_index, 0, :, : ,:])
        positive_pic = transforms.ToPILImage()(output[tuple_index, 1, :, : ,:])

        axes[0].imshow(anchor_pic)
        axes[0].axis('off')
        axes[0].set_title('Anchor Tile')
        axes[1].imshow(positive_pic)
        axes[1].axis('off')
        axes[1].set_title('Positive Tile')

        for i in range(2,negative_examples_count + 2):
            negative_pic = transforms.ToPILImage()(output[tuple_index, i, :, : ,:])
            axes[i].imshow(negative_pic)
            axes[i].axis('off')
            axes[i].set_title('Negative Tile')

        plt.show()

train_dataset.stop()
