# System Evaluation

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib2 import Path
from collections import OrderedDict
import shutil

import numpy as np
import skimage.io as io
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
import pandas as pd

from src.core.classifier import Classifier
from src.core.data_manager import DataManager
from src.core.cell_extractor import Extractor

#Setup The Environment
generated_dir = Path("generated")

EXECUTE_CELL_LABELING = False # set to True if you want to extract cells from fields and label them manually. Usually the first time you run this notebook.

#### Setup The environment

In [None]:
labels_mapping =  OrderedDict({'epiteliali':0,
                  'neutrofili':1,
                  'eosinofili':2,
                  'mastcellule':3,
                  'linfociti':4,
                  'mucipare':5,
                  'altro':6})

data_manager = DataManager("assets")

#### Build directory structure

In [None]:
if EXECUTE_CELL_LABELING:
    if data_manager.assets_path.exists(): # remove previous execution data
        shutil.rmtree(data_manager.assets_path)

    # Build directory structure
    data_manager.assets_path.mkdir()
    data_manager.input_path.mkdir()
    data_manager.cells_path.mkdir()
    data_manager.out_path.mkdir()

    for class_dir in data_manager.classes_path:
       class_dir.mkdir()
    

## First pass: automatic extraction and classification

### Get fields from database

##### Since there's files with the same name in different folder, we reindex the file names and than copy the field images in assets/input directory. This step is essential otherwise duplicates fields won't be extracted

In [None]:
%%time
if EXECUTE_CELL_LABELING:
    fields_dir = Path("../../Datasets/Fields/Cytospin")
    fields = [field for slides_dir in fields_dir.glob("*") for field in slides_dir.glob("*.png")]

    reindexed = [(origin_name, "img-{:04d}.png".format(i)) for i, origin_name in enumerate(fields, start=1)] # reindex file
    for src, dst_name in reindexed:
        shutil.copy(src, data_manager.input_path / dst_name)

### Extract Cells from fields

In [None]:
%%time
if EXECUTE_CELL_LABELING:
    extractor = Extractor(data_manager)
    extractor.batch_process() # read every fields from assets/input and extract cells in assets/cells 

### Classify cells

In [None]:
%%time
if EXECUTE_CELL_LABELING:
    classifier = Classifier("config.ini", data_manager)
    classifier.batch_process() # read every cells from assets/cells and move in assets/out/<predicted class> directory 

## Manual labeling

In [None]:
labeled_cells_dir_name = "labeled_cells"
labeled_cells_dir = Path(labeled_cells_dir_name)

if EXECUTE_CELL_LABELING:
    if labeled_cells_dir.exists():
        shutil.rmtree(labeled_cells_dir)

    shutil.copytree(data_manager.out_path, labeled_cells_dir)

And now the fun part! You must manually labeling the cells (with supervision).
When you have your final result you must copy the labeled_cells directory into cells dataset

Make pdf with cells presentation grid for labeling revision

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import os 

images = io.imread_collection(os.path.join(labeled_cells_dir,'eosinofili','*.png')) ## example path

nrows = 5
ncols = 3
gridsize = nrows * ncols
npages = int(np.ceil(len(images) / (gridsize)))

with PdfPages(generated_dir/'labeling_revision_grid.pdf') as pdf:
    for page in range(0, npages):
        fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(8.27,11.69), dpi=100)
        low = gridsize * page
        high = min(len(images), (low+gridsize))
       
        for img, file_name, ax in zip(images[low:high], images.files[low:high], axes.ravel()):
            ax.imshow(img)
            ax.set_title("{}: _________".format(Path(file_name).stem), fontdict={'size':10})
        
        for ax in axes.ravel():
            ax.set_axis_off()
       
        plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)
        pdf.savefig(fig)
        plt.close(fig)

## Second pass: Classifier Evaluation

In [None]:
def load_labels(path, mapping=None):
    if not mapping:
       raise ValueError("mapping argument can't be None! Assign a dictionary of pairs: {'label name': label value}")
        
    labels = []
    
    for dir_class in path.glob("*"):
        label = mapping[dir_class.name] # map directory name to label integer
        for file in dir_class.glob("*.png"):
            labels.append((file.name,label))
 
    return np.array(labels)
    

### Dataset cell distribution

In [None]:
classes = list(labels_mapping.keys())
number_of_samples = np.asarray([len(list((labeled_cells_dir / dir_class_name).glob("*.png"))) for dir_class_name in classes])
percentual = np.true_divide(number_of_samples, number_of_samples.sum()) * 100
dataset_stats_df = pd.DataFrame({'Classe':classes, 
                                 'Numero di campioni':number_of_samples,
                                 '%':percentual})
                                            
dataset_stats_df

In [None]:
dataset_stats_df.to_latex(str(generated_dir / "cyt_cells_classes_distribution.tex"), index=False, float_format='%1.2f')

### Load labels and build confusion matrix

In [None]:
# Load labels
true_labels = load_labels(labeled_cells_dir, labels_mapping)
predicted_labels = load_labels(data_manager.out_path, labels_mapping)

# sort labels by file name (since confusion matrix re)
true_labels = true_labels[true_labels[:,0].argsort()]
predicted_labels = predicted_labels[predicted_labels[:,0].argsort()]

cm = confusion_matrix(true_labels[:,1],predicted_labels[:,1])

##### Convert numpy confusion matrix to pandas DataFrame

In [None]:
del(classes[3])
rows_index = pd.MultiIndex.from_tuples([('Vera',c) for c in classes])
columns_index = pd.MultiIndex.from_tuples([('Predetta',c) for c in classes])
cmf = pd.DataFrame(cm, index = rows_index, columns = columns_index)
cmf

In [None]:
cmf.to_latex(generated_dir/"classifier_confusion_matrix.tex", multirow=True, multicolumn=True)

### Evaluation Metrics

In [None]:
precision, recall, f1, support = precision_recall_fscore_support(true_labels[:,1],predicted_labels[:,1])

metrics_df = pd.DataFrame({'Precision': precision,
                           'Recall': recall,
                           'F1': f1,
                           'Support': support},
                         index = classes)
metrics_df.index.name = 'Classe'
metrics_df

In [None]:
metrics_df.to_latex(generated_dir/"classifier_metrics.tex", multirow=True, multicolumn=True, float_format='%1.2f')

## Extraction evaluation