In [55]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import pickle
import json
import torch

from PIL import Image
from matplotlib import patches
from pathlib import Path
from tqdm.autonotebook import tqdm
from typing import Dict, List, Tuple

from extract_features import get_all_annotations

In [57]:
dataset_file = 'annotations/midog_2022_test.csv'
img_dir = '/data/patho/MIDOG2/finalTest/'
figure_dir = 'figures/'
result_dir = 'results/'

In [58]:
# load data 
dataset = pd.read_csv(dataset_file)

# filter test samples 
test_dataset = dataset.query('split == "test"')

# create test codes
test_codes = {k: v for k, v in enumerate(test_dataset['tumortype'].unique())}

# get test samples and labels
test_samples = get_all_annotations(
    dataset=test_dataset, 
    img_dir_path=img_dir, 
    domain_col='tumortype'
    )

In [59]:
def load_metrics(filename, metric, test_codes):
    test_codes_wo = {k: ''.join(v.split(' ')) for k, v in test_codes.items()}
    inverted_test_codes_wo = {v: k for k, v in test_codes_wo.items()}
    all_metric_data = json.load(open(filename, 'rb'))
    aggregates = all_metric_data['aggregates']
    metric_dict = {}
    for key in aggregates:
        if key.startswith('tumor') and key.endswith(metric):
            _, tumor, _ = key.split('_')
            metric_dict[inverted_test_codes_wo[tumor]] = aggregates[key]
    return metric_dict, all_metric_data

In [60]:
import utils.constants as constants
import plotly.express as px
import plotly.graph_objects as go 
import itertools

from plotly.subplots import make_subplots


def plot_similarity(
        model_name: str, 
        result_dir: str, 
        metric_dir: str, 
        detector: str='FCOS', 
        kind: str='scatter', 
        metric: str='AP', 
        all_models: bool=False,
        num_models: int = 5,
        all_layers: bool=False, 
        return_df: bool=False
        ) -> None:
    
        layers = constants.YOLO_LAYERS
        abbrevs = constants.MIDOG_ABBREVATIONS
        layer_codes = constants.LAYER_CODES


        if all_models:
                dfs = []
                for i in range(num_models):
                        model_name = model_name[:-1] + str(i)
                        # load hdv results
                        hdv_filename = os.path.join(result_dir, 'hdv_' + model_name + '.pkl')
                        hdv_scores = pickle.load(open(hdv_filename, 'rb'))

                        # load metric results
                        metrics_filename = os.path.join(metric_dir, model_name + '.json')
                        metric_data, _ = load_metrics(metrics_filename, metric=metric, test_codes=test_codes)

                        # create long dataframe
                        df = pd.DataFrame(hdv_scores)
                        df['Tumortype'] = df.index.map(test_codes)
                        df['Tumortype'] = df.index.map(abbrevs)
                        df[metric] = df.index.map(metric_data)
                        df = df.melt(id_vars=['Tumortype', metric], var_name='Layer' )
                        df['run'] = i
                        dfs.append(df)
                        df = pd.concat(dfs)

        else:

                # load hdv results
                hdv_filename = os.path.join(result_dir, 'hdv_' + model_name + '.pkl')
                hdv_scores = pickle.load(open(hdv_filename, 'rb'))

                # load metric results
                metrics_filename = os.path.join(metric_dir, model_name + '.json')
                metric_data, _ = load_metrics(metrics_filename, metric=metric, test_codes=test_codes)

                # create long dataframe
                df = pd.DataFrame(hdv_scores)
                df['Tumortype'] = df.index.map(test_codes)
                df['Tumortype'] = df.index.map(abbrevs)
                df[metric] = df.index.map(metric_data)
                df = df.melt(id_vars=['Tumortype', metric], var_name='Layer' )

        # rename layers
        df['Layer'] = df['Layer'].map(layer_codes)

        # drop layers
        df.drop(df[df['Tumortype'] == 'HAC'].index, inplace=True)
        df.drop(df[df['Layer'] == 'P1'].index, inplace=True)

        if return_df:
                return df

        fig = px.scatter(
                df, x=metric, y='value', 
                color='Tumortype', facet_col='Layer', facet_col_wrap=5, labels={'value': 'HDV'})
        

        fig.show()

In [65]:
model_name = 'yolov7_d6_ALL_0'
metric_dir = 'results/'

In [66]:
plot_similarity(model_name, result_dir='results/reduced_dims100', metric_dir=metric_dir, kind='scatter', all_models=False, all_layers=True, return_df=False)

In [67]:
plot_similarity(model_name, result_dir='results/reduced_dims100', metric_dir=metric_dir, kind='scatter', all_models=True, num_models=4, all_layers=True, return_df=False)