In [None]:
import os
import pickle
from pathlib import Path
from collections import OrderedDict
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import colors as mplcolors
from matplotlib.patches import Circle, RegularPolygon
from matplotlib.path import Path as MPLPath
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D

In [None]:
colors = [
    'yellowgreen',
    'teal',
    'indigo',
    'darkmagenta',
    'red',
    'darkorange',
]

# Chondrometrics

In [None]:
dfs = dict()
path_results = '/home/egor/Workspace/proj_scartan/results'
# path_tmp = Path(path_results, 't/csvs_chondr_rigid_lt')
# path_tmp = Path(path_results, 't/csvs_chondr_rigid_dt')
path_tmp = Path(path_results, 't/csvs_chondr_elastic_lt')
# path_tmp = Path(path_results, 't/csvs_chondr_elastic_dt')

dfs['4vs2_000m-012m'] = pd.read_csv(f"{path_tmp}/4vs2_000m-012m.csv")
dfs['4vs2_000m-024m'] = pd.read_csv(f"{path_tmp}/4vs2_000m-024m.csv")
dfs['34vs12_000m-012m'] = pd.read_csv(f"{path_tmp}/34vs12_000m-012m.csv")
dfs['34vs12_000m-024m'] = pd.read_csv(f"{path_tmp}/34vs12_000m-024m.csv")

In [None]:
feature_order = [
    'cMF.VC', 'MT.VC',
    'cMF.ThCtAB.MEAN', 'cMF.ThCcAB.MEAN',
    'MT.ThCtAB.MEAN', 'MT.ThCcAB.MEAN',
    'cMFTC.ThCtAB.MEAN', 'MFTC.ThCtAB.MEAN',
    'ecMF.ThCtAB.MEAN', 'ccMF.ThCtAB.MEAN', 'icMF.ThCtAB.MEAN',
    'aMT.ThCtAB.MEAN', 'eMT.ThCtAB.MEAN', 'cMT.ThCtAB.MEAN',
    'iMT.ThCtAB.MEAN', 'pMT.ThCtAB.MEAN', 
    
    'cLF.VC', 'LT.VC',
    'cLF.ThCtAB.MEAN', 'cLF.ThCcAB.MEAN',
    'LT.ThCtAB.MEAN', 'LT.ThCcAB.MEAN',
    'cLFTC.ThCtAB.MEAN', 'LFTC.ThCtAB.MEAN',
    'ecLF.ThCtAB.MEAN', 'ccLF.ThCtAB.MEAN', 'icLF.ThCtAB.MEAN',
    'aLT.ThCtAB.MEAN', 'eLT.ThCtAB.MEAN', 'cLT.ThCtAB.MEAN',
    'iLT.ThCtAB.MEAN', 'pLT.ThCtAB.MEAN', 
]

# k = '4vs2_000m-012m'
# k = '4vs2_000m-024m'
# k = '34vs12_000m-012m'
k = '34vs12_000m-024m'

df_tmp = dfs[k].copy()
df_tmp["odds_ratio"] = df_tmp["odds_ratio"].round(3)

display(df_tmp
        .pivot(index='feature_code', columns='author',
               values=['odds_ratio', 'pvalue'])
        .loc[feature_order, :])

In [None]:
features_lat_vc_thctab = [
    'cLF.VC', 'LT.VC',
    'cLF.ThCtAB.MEAN', 'LT.ThCtAB.MEAN', 'cLFTC.ThCtAB.MEAN', 'LFTC.ThCtAB.MEAN',
    'ecLF.ThCtAB.MEAN', 'ccLF.ThCtAB.MEAN', 'icLF.ThCtAB.MEAN',
    'aLT.ThCtAB.MEAN', 'eLT.ThCtAB.MEAN', 'cLT.ThCtAB.MEAN', 'iLT.ThCtAB.MEAN', 'pLT.ThCtAB.MEAN',
]
features_med_vc_thctab = [
    'cMF.VC', 'MT.VC',
    'cMF.ThCtAB.MEAN', 'MT.ThCtAB.MEAN', 'cMFTC.ThCtAB.MEAN', 'MFTC.ThCtAB.MEAN', 
    'ecMF.ThCtAB.MEAN', 'ccMF.ThCtAB.MEAN', 'icMF.ThCtAB.MEAN',
    'aMT.ThCtAB.MEAN', 'eMT.ThCtAB.MEAN', 'cMT.ThCtAB.MEAN', 'iMT.ThCtAB.MEAN', 'pMT.ThCtAB.MEAN',
]
features_lat_thccab = [
    'cLF.ThCcAB.MEAN', 'LT.ThCcAB.MEAN',
]
features_med_thccab = [
    'cMF.ThCcAB.MEAN', 'MT.ThCcAB.MEAN',
]

#N = len(features_vc_thctab)
matplotlib.rcParams.update({'font.size': 14})

ks = [
    '4vs2_000m-012m',
    '4vs2_000m-024m',
    '34vs12_000m-012m',
    '34vs12_000m-024m',
]
for k in ks:
    df = dfs[k]
#     df.head()

#     fig, axes = plt.subplots(figsize=(9, 7),
#                              nrows=2, ncols=1)
#     ax = axes.ravel()
#     width = 0.25
    
    for p, side in enumerate(['med', 'lat']):
        p = 0
        fig, axes = plt.subplots(figsize=(9, 4.2),
                             nrows=1, ncols=1)
        ax = [axes,]
        width = 0.25
        
        if side == 'med':
            features_vc_thctab = features_med_vc_thctab
            features_thccab = features_med_thccab
        elif side == 'lat':
            features_vc_thctab = features_lat_vc_thctab
            features_thccab = features_lat_thccab
        else:
            raise ValueError()

        # Ours
        data_ours = df[df['author'] == 'ours']
        data_ours = data_ours.set_index('feature_code')

        d = data_ours.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x - width, y, width=width,
                  color='darkorange', zorder=100500,
                  label='Automatic (our), cAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] - width, y[sel] + 0.2,
                      color='darkorange', marker='*', s=100, zorder=100500)

        # Chondr, ThCtAB
        data_ours = df[df['author'] == 'chondr']
        data_ours = data_ours.set_index('feature_code')

        d = data_ours.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x, y, width=width,
                  color='teal', zorder=100500,
                  label='Semi-automatic (Chondrometrics), tAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel], y[sel] + 0.2,
                      color='teal', marker='*', s=100, zorder=100500)

        # Chondr, ThCcAB
        data_ours = df[df['author'] == 'chondr']
        data_ours = data_ours.set_index('feature_code')

        d = data_ours.loc[features_thccab, :]
        d['x'] = [2, 3]

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x + width, y, width=width,
                  color='yellowgreen', zorder=100500,
                  label='Semi-automatic (Chondrometrics), cAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] + width, y[sel] + 0.2,
                      color='yellowgreen', marker='*', s=100, zorder=100500)

        #
        variables = [e.replace('ThCtAB', 'ThC').replace('.MEAN', '')
                     for e in features_vc_thctab]
        ax[p].set_xticks(range(len(variables)))
        ax[p].set_xticklabels(variables, rotation=30)

        ax[p].grid(axis='y')
#         ax[p].legend(bbox_to_anchor=(0., 1.06, 1., .102),
#                      ncol=3, mode="expand", borderaxespad=0.5)
        
        ax[p].set_ylabel('Odds ratio')
        ax[p].set_xlabel('Morphological feature')
        ax[p].set_ylim((0, 6.7))
        
        plt.tight_layout()

        plt.savefig(Path(path_tmp, f"{k}_{side}.tiff"), dpi=600)
        plt.show()
    #     break
    
    
        # LEGEND
        fig_leg = plt.figure(figsize=(13, 2))
        ax_leg = fig_leg.add_subplot(111)
        
        ax_leg.legend(*ax[p].get_legend_handles_labels(), loc='center',
                      ncol=3, mode="expand",
                     )
        ax_leg.axis('off')
        
        plt.tight_layout()
        fig_leg.savefig(Path(path_tmp, "legend.tiff"), dpi=600)
        plt.show()

# Biomediq

In [None]:
dfs = dict()
path_tmp = '/home/egor/Workspace/proj_scartan/results/t/csvs_biomediq_'
dfs['4vs2_000m-012m'] = pd.read_csv(f"{path_tmp}/4vs2_000m-012m.csv")
dfs['4vs2_000m-024m'] = pd.read_csv(f"{path_tmp}/4vs2_000m-024m.csv")
dfs['34vs12_000m-012m'] = pd.read_csv(f"{path_tmp}/34vs12_000m-012m.csv")
dfs['34vs12_000m-024m'] = pd.read_csv(f"{path_tmp}/34vs12_000m-024m.csv")

In [None]:
feature_order = [
    'F.VC', 'LF.VC', "MF.VC",
    "LT.VC", "MT.VC", "P.VC",
    "LM.VC", "MM.VC",
]

# k = '4vs2_000m-012m'
# k = '4vs2_000m-024m'
# k = '34vs12_000m-012m'
k = '34vs12_000m-024m'

df_tmp = dfs[k].copy()
df_tmp["odds_ratio"] = df_tmp["odds_ratio"].round(3)

display(df_tmp
        .pivot(index='feature_code', columns='author',
               values=['odds_ratio', 'pvalue'])
        .loc[feature_order, :])

In [None]:
matplotlib.rcParams.update({'font.size': 14})


features_vc = [
    'F.VC', 'LF.VC', 'MF.VC',
    'LT.VC', 'MT.VC',
    'P.VC', 'LM.VC', 'MM.VC',
]

N = len(features_vc)

ks = [
    '4vs2_000m-012m',
    '4vs2_000m-024m',
    '34vs12_000m-012m',
    '34vs12_000m-024m',
]
for k in ks:
    df = dfs[k]
#     df.head()

    fig, ax = plt.subplots(figsize=(6, 4.2),
                             nrows=1, ncols=1)
    width = 0.25
    
    # Ours
    data_ours = df[df['author'] == 'ours']
    data_ours = data_ours.set_index('feature_code')

    d = data_ours.loc[features_vc, :]
    d['x'] = range(len(d))

    x = d['x']
    y = d['odds_ratio']
    ax.bar(x - width/2, y, width=width,
           color='darkorange', zorder=100500,
           label='Automatic (our)')

    sel = d['pvalue'] < 0.05
    ax.scatter(x[sel] - width/2, y[sel] + 0.2,
               color='darkorange', marker='*', s=100, zorder=100500)

    # Biomediq
    data_ours = df[df['author'] == 'biomediq']
    data_ours = data_ours.set_index('feature_code')

    d = data_ours.loc[features_vc, :]
    d['x'] = range(len(d))

    x = d['x']
    y = d['odds_ratio']
    ax.bar(x + width/2, y, width=width,
           color='darkmagenta', zorder=100500,
           label='Automatic (Biomediq)')

    sel = d['pvalue'] < 0.05
    ax.scatter(x[sel] + width/2, y[sel] + 0.2,
               color='darkmagenta', marker='*', s=100, zorder=100500)

    #
    variables = [e for e in features_vc]
    ax.set_xticks(range(len(variables)))
    ax.set_xticklabels(variables, rotation=30)

    ax.grid(axis='y')
#     ax.legend(bbox_to_anchor=(0., 1.06, 1., .102),
#               ncol=2, mode="expand", borderaxespad=0.)

    ax.set_ylabel('Odds ratio')
    ax.set_xlabel('Morphological feature')
    ax.set_ylim((0, 4.5))

    plt.tight_layout()

    plt.savefig(Path(path_tmp, f"{k}.tiff"), dpi=600)
#     plt.show()
#     break


    # LEGEND
    fig_leg = plt.figure(figsize=(6, 2))
    ax_leg = fig_leg.add_subplot(111)

    ax_leg.legend(*ax.get_legend_handles_labels(), loc='center',
                  ncol=2, mode="expand",
                 )
    ax_leg.axis('off')

    plt.tight_layout()
    fig_leg.savefig(Path(path_tmp, "legend.tiff"), dpi=600)

    plt.show()

# Supplemental. Ablation thickness

In [None]:
dfs = dict()
path_results = '/home/egor/Workspace/proj_scartan/results'

for thickness in ("lt", "dt"):
    # path_tmp = Path(path_results, f't/csvs_chondr_rigid_{thickness}')
    path_tmp = Path(path_results, f't/csvs_chondr_elastic_{thickness}')
    path_out = Path(path_results, f't/supplemental_thickness')
    path_out.mkdir(exist_ok=True)

    dfs[('4vs2_000m-012m', thickness)] = pd.read_csv(f"{path_tmp}/4vs2_000m-012m.csv")
    dfs[('4vs2_000m-024m', thickness)] = pd.read_csv(f"{path_tmp}/4vs2_000m-024m.csv")
    dfs[('34vs12_000m-012m', thickness)] = pd.read_csv(f"{path_tmp}/34vs12_000m-012m.csv")
    dfs[('34vs12_000m-024m', thickness)] = pd.read_csv(f"{path_tmp}/34vs12_000m-024m.csv")

In [None]:
features_lat_vc_thctab = [
    'cLF.VC', 'LT.VC',
    'cLF.ThCtAB.MEAN', 'LT.ThCtAB.MEAN', 'cLFTC.ThCtAB.MEAN', 'LFTC.ThCtAB.MEAN',
    'ecLF.ThCtAB.MEAN', 'ccLF.ThCtAB.MEAN', 'icLF.ThCtAB.MEAN',
    'aLT.ThCtAB.MEAN', 'eLT.ThCtAB.MEAN', 'cLT.ThCtAB.MEAN', 'iLT.ThCtAB.MEAN', 'pLT.ThCtAB.MEAN',
]
features_med_vc_thctab = [
    'cMF.VC', 'MT.VC',
    'cMF.ThCtAB.MEAN', 'MT.ThCtAB.MEAN', 'cMFTC.ThCtAB.MEAN', 'MFTC.ThCtAB.MEAN', 
    'ecMF.ThCtAB.MEAN', 'ccMF.ThCtAB.MEAN', 'icMF.ThCtAB.MEAN',
    'aMT.ThCtAB.MEAN', 'eMT.ThCtAB.MEAN', 'cMT.ThCtAB.MEAN', 'iMT.ThCtAB.MEAN', 'pMT.ThCtAB.MEAN',
]
features_lat_thccab = [
    'cLF.ThCcAB.MEAN', 'LT.ThCcAB.MEAN',
]
features_med_thccab = [
    'cMF.ThCcAB.MEAN', 'MT.ThCcAB.MEAN',
]

#N = len(features_vc_thctab)
matplotlib.rcParams.update({'font.size': 14})

ks = [
    '4vs2_000m-012m',
    '4vs2_000m-024m',
    '34vs12_000m-012m',
    '34vs12_000m-024m',
]
for k in ks:
    df_lt = dfs[(k, "lt")]
    df_dt = dfs[(k, "dt")]
#     df.head()

#     fig, axes = plt.subplots(figsize=(9, 7),
#                              nrows=2, ncols=1)
#     ax = axes.ravel()
#     width = 0.25
    
    for p, side in enumerate(['med', 'lat']):
        p = 0
        fig, axes = plt.subplots(figsize=(9, 4.2),
                             nrows=1, ncols=1)
        ax = [axes,]
        width = 0.2
        
        if side == 'med':
            features_vc_thctab = features_med_vc_thctab
            features_thccab = features_med_thccab
        elif side == 'lat':
            features_vc_thctab = features_lat_vc_thctab
            features_thccab = features_lat_thccab
        else:
            raise ValueError()

        # -----------------------------------------------------------------
        # Ours, local thickness
        data = df_lt[df_lt['author'] == 'ours']
        data = data.set_index('feature_code')

        d = data.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x - 1.5 * width, y, width=width,
                  color='darkorange', zorder=100500,
                  label='Automatic (our), local thickness, cAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] - 1.5 * width, y[sel] + 0.2,
                      color='darkorange', marker='*', s=100, zorder=100500)
        # -----------------------------------------------------------------

        # Ours, distance transform
        data = df_dt[df_dt['author'] == 'ours']
        data = data.set_index('feature_code')

        d = data.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x - 0.5 * width, y, width=width,
                  color='indigo', zorder=100500,
                  label='Automatic (our), distance transform, cAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] - 0.5 * width, y[sel] + 0.2,
                      color='indigo', marker='*', s=100, zorder=100500)
        
        # -----------------------------------------------------------------
        # Chondr, ThCtAB
        data = df_lt[df_lt['author'] == 'chondr']
        data = data.set_index('feature_code')

        d = data.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x + 0.5 * width, y, width=width,
                  color='teal', zorder=100500,
                  label='Semi-automatic (Chondrometrics), tAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] + 0.5 * width, y[sel] + 0.2,
                      color='teal', marker='*', s=100, zorder=100500)

        # -----------------------------------------------------------------
        # Chondr, ThCcAB
        data = df_lt[df_lt['author'] == 'chondr']
        data = data.set_index('feature_code')

        d = data.loc[features_thccab, :]
        d['x'] = [2, 3]

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x + 1.5 * width, y, width=width,
                  color='yellowgreen', zorder=100500,
                  label='Semi-automatic (Chondrometrics), cAB')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] + 1.5 * width, y[sel] + 0.2,
                      color='yellowgreen', marker='*', s=90, zorder=100500)
        
        # -----------------------------------------------------------------
        variables = [e.replace('ThCtAB', 'ThC').replace('.MEAN', '')
                     for e in features_vc_thctab]
        ax[p].set_xticks(range(len(variables)))
        ax[p].set_xticklabels(variables, rotation=30)

        ax[p].grid(axis='y')
#         ax[p].legend(bbox_to_anchor=(0., 1.06, 1., .102),
#                      ncol=3, mode="expand", borderaxespad=0.5)
        
        ax[p].set_ylabel('Odds ratio')
        ax[p].set_xlabel('Morphological feature')
        ax[p].set_ylim((0, 6.7))
        
        plt.tight_layout()

        plt.savefig(Path(path_out, f"{k}_{side}.png"), dpi=300)
        plt.show()
    #     break
    
        # LEGEND
        fig_leg = plt.figure(figsize=(10, 2))
        ax_leg = fig_leg.add_subplot(111)
        
        ax_leg.legend(*ax[p].get_legend_handles_labels(), loc='center',
                      ncol=2, mode="expand",
                     )
        ax_leg.axis('off')
        
        plt.tight_layout()
        fig_leg.savefig(Path(path_out, "legend.png"), dpi=300)
        plt.show()

# Supplemental. KL-wise ORs

In [None]:
dfs = dict()
path_results = '/home/egor/Workspace/proj_scartan/results'

path_tmp = Path(path_results, f't/csvs_chondr_elastic_lt')
path_out = Path(path_results, f't/supplemental_klwise_ors')
path_out.mkdir(exist_ok=True)

for cmp in ("4vs2", "34vs12"):
    for visit_range in ("000m-012m", "000m-024m"):
        for kl in ("all", "1", "2", "3"):
            key = f"{cmp}_{visit_range}-{kl}"
            dfs[key] = pd.read_csv(f"{path_tmp}/{key}.csv")

In [None]:
feature_order = [
    'cMF.VC', 'MT.VC',
    'cMF.ThCtAB.MEAN', 'cMF.ThCcAB.MEAN',
    'MT.ThCtAB.MEAN', 'MT.ThCcAB.MEAN',
    'cMFTC.ThCtAB.MEAN', 'MFTC.ThCtAB.MEAN',
    'ecMF.ThCtAB.MEAN', 'ccMF.ThCtAB.MEAN', 'icMF.ThCtAB.MEAN',
    'aMT.ThCtAB.MEAN', 'eMT.ThCtAB.MEAN', 'cMT.ThCtAB.MEAN',
    'iMT.ThCtAB.MEAN', 'pMT.ThCtAB.MEAN', 
    
    'cLF.VC', 'LT.VC',
    'cLF.ThCtAB.MEAN', 'cLF.ThCcAB.MEAN',
    'LT.ThCtAB.MEAN', 'LT.ThCcAB.MEAN',
    'cLFTC.ThCtAB.MEAN', 'LFTC.ThCtAB.MEAN',
    'ecLF.ThCtAB.MEAN', 'ccLF.ThCtAB.MEAN', 'icLF.ThCtAB.MEAN',
    'aLT.ThCtAB.MEAN', 'eLT.ThCtAB.MEAN', 'cLT.ThCtAB.MEAN',
    'iLT.ThCtAB.MEAN', 'pLT.ThCtAB.MEAN', 
]

# k = '4vs2_000m-012m'
# k = '4vs2_000m-024m'
# k = '34vs12_000m-012m'
k = '34vs12_000m-024m'

df_tmp = dfs[k].copy()
df_tmp["odds_ratio"] = df_tmp["odds_ratio"].round(3)

display(df_tmp
        .pivot(index='feature_code', columns='author',
               values=['odds_ratio', 'pvalue'])
        .loc[feature_order, :])

In [None]:
features_lat_vc_thctab = [
    'cLF.VC', 'LT.VC',
    'cLF.ThCtAB.MEAN', 'LT.ThCtAB.MEAN', 'cLFTC.ThCtAB.MEAN', 'LFTC.ThCtAB.MEAN',
    'ecLF.ThCtAB.MEAN', 'ccLF.ThCtAB.MEAN', 'icLF.ThCtAB.MEAN',
    'aLT.ThCtAB.MEAN', 'eLT.ThCtAB.MEAN', 'cLT.ThCtAB.MEAN', 'iLT.ThCtAB.MEAN', 'pLT.ThCtAB.MEAN',
]
features_med_vc_thctab = [
    'cMF.VC', 'MT.VC',
    'cMF.ThCtAB.MEAN', 'MT.ThCtAB.MEAN', 'cMFTC.ThCtAB.MEAN', 'MFTC.ThCtAB.MEAN', 
    'ecMF.ThCtAB.MEAN', 'ccMF.ThCtAB.MEAN', 'icMF.ThCtAB.MEAN',
    'aMT.ThCtAB.MEAN', 'eMT.ThCtAB.MEAN', 'cMT.ThCtAB.MEAN', 'iMT.ThCtAB.MEAN', 'pMT.ThCtAB.MEAN',
]
features_lat_thccab = [
    'cLF.ThCcAB.MEAN', 'LT.ThCcAB.MEAN',
]
features_med_thccab = [
    'cMF.ThCcAB.MEAN', 'MT.ThCcAB.MEAN',
]

#N = len(features_vc_thctab)
matplotlib.rcParams.update({'font.size': 14})

ks = [
    '4vs2_000m-012m',
    '4vs2_000m-024m',
    '34vs12_000m-012m',
    '34vs12_000m-024m',
]
for k_p in ks:
    for kl in ("all", "1", "2", "3"):
        k = f"{k_p}-{kl}"

        df = dfs[k]
    #     df.head()

    #     fig, axes = plt.subplots(figsize=(9, 7),
    #                              nrows=2, ncols=1)
    #     ax = axes.ravel()
    #     width = 0.25

        for p, side in enumerate(['med', 'lat']):
            p = 0
            fig, axes = plt.subplots(figsize=(9, 4.2),
                                 nrows=1, ncols=1)
            ax = [axes,]
            width = 0.25

            if side == 'med':
                features_vc_thctab = features_med_vc_thctab
                features_thccab = features_med_thccab
            elif side == 'lat':
                features_vc_thctab = features_lat_vc_thctab
                features_thccab = features_lat_thccab
            else:
                raise ValueError()

            # Ours
            data_ours = df[df['author'] == 'ours']
            data_ours = data_ours.set_index('feature_code')

            d = data_ours.loc[features_vc_thctab, :]
            d['x'] = range(len(d))

            x = d['x']
            y = d['odds_ratio']
            ax[p].bar(x - width, y, width=width,
                      color='darkorange', zorder=100500,
                      label='Automatic (our), cAB')

            sel = d['pvalue'] < 0.05
            ax[p].scatter(x[sel] - width, y[sel] + 0.2,
                          color='darkorange', marker='*', s=100, zorder=100500)

            # Chondr, ThCtAB
            data_ours = df[df['author'] == 'chondr']
            data_ours = data_ours.set_index('feature_code')

            d = data_ours.loc[features_vc_thctab, :]
            d['x'] = range(len(d))

            x = d['x']
            y = d['odds_ratio']
            ax[p].bar(x, y, width=width,
                      color='teal', zorder=100500,
                      label='Semi-automatic (Chondrometrics), tAB')

            sel = d['pvalue'] < 0.05
            ax[p].scatter(x[sel], y[sel] + 0.2,
                          color='teal', marker='*', s=100, zorder=100500)

            # Chondr, ThCcAB
            data_ours = df[df['author'] == 'chondr']
            data_ours = data_ours.set_index('feature_code')

            d = data_ours.loc[features_thccab, :]
            d['x'] = [2, 3]

            x = d['x']
            y = d['odds_ratio']
            ax[p].bar(x + width, y, width=width,
                      color='yellowgreen', zorder=100500,
                      label='Semi-automatic (Chondrometrics), cAB')

            sel = d['pvalue'] < 0.05
            ax[p].scatter(x[sel] + width, y[sel] + 0.2,
                          color='yellowgreen', marker='*', s=100, zorder=100500)

            #
            variables = [e.replace('ThCtAB', 'ThC').replace('.MEAN', '')
                         for e in features_vc_thctab]
            ax[p].set_xticks(range(len(variables)))
            ax[p].set_xticklabels(variables, rotation=30)

            ax[p].grid(axis='y')
    #         ax[p].legend(bbox_to_anchor=(0., 1.06, 1., .102),
    #                      ncol=3, mode="expand", borderaxespad=0.5)

            ax[p].set_ylabel('Odds ratio')
            ax[p].set_xlabel('Morphological feature')
            ax[p].set_ylim((0, 6.5))

            plt.tight_layout()

            plt.savefig(Path(path_out, f"{k}_{side}.png"), dpi=300)
            plt.show()
        #     break

            # LEGEND
            fig_leg = plt.figure(figsize=(13, 2))
            ax_leg = fig_leg.add_subplot(111)

            ax_leg.legend(*ax[p].get_legend_handles_labels(), loc='center',
                          ncol=3, mode="expand",
                         )
            ax_leg.axis('off')

            plt.tight_layout()
            fig_leg.savefig(Path(path_out, "legend.png"), dpi=300)
            plt.show()

In [None]:
features_vc_thctab = [
    'cMF.VC', 'MT.VC',
    'cMF.ThCtAB.MEAN', 'MT.ThCtAB.MEAN', 'cMFTC.ThCtAB.MEAN', 'MFTC.ThCtAB.MEAN', 
    'ecMF.ThCtAB.MEAN', 'ccMF.ThCtAB.MEAN', 'icMF.ThCtAB.MEAN',
    'aMT.ThCtAB.MEAN', 'eMT.ThCtAB.MEAN', 'cMT.ThCtAB.MEAN', 'iMT.ThCtAB.MEAN', 'pMT.ThCtAB.MEAN',
]
side = "med"

#N = len(features_vc_thctab)
matplotlib.rcParams.update({'font.size': 14})

ks_base = [
#     '4vs2_000m-012m',
#     '4vs2_000m-024m',
    '34vs12_000m-012m',
    '34vs12_000m-024m',
]

for k_base in ks_base:
    
    p = 0
    fig, axes = plt.subplots(figsize=(14, 4),
                         nrows=1, ncols=1)
    ax = [axes,]
    width = 0.11

#     for kl in ("all", "1"):
    for kl in ("1", "2", "3"):
#     for kl in ("all", "1", "2", "3"):
        k = f"{k_base}-{kl}"
        df_sel = dfs[k]
        
        offset_ours = {"1": -2.8 * width,
                       "2": -1.7 * width,
                       "3": -0.6 * width}[kl]
        offset_chondr = {"1": 0.6 * width,
                         "2": 1.7 * width,
                         "3": 2.8 * width}[kl]
#         offset_ours = {"1": -2.8 * width,
#                        "2": -0.6 * width,
#                        "3": 1.7 * width}[kl]
#         offset_chondr = {"1": -1.7 * width,
#                          "2": 0.6 * width,
#                          "3": 2.8 * width}[kl]
        color_ours = {"1": "gold",
                      "2": "orange",
                      "3": "darkorange"}[kl]
        color_chondr = {"1": "paleturquoise",
                        "2": "lightseagreen",
                        "3": "teal"}[kl]
        
        # -----------------------------------------------------------------
        # Ours, local thickness
        data = df_sel[df_sel['author'] == 'ours']
        data = data.set_index('feature_code')

        d = data.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x + offset_ours, y, width=width,
                  color=color_ours, zorder=100500, alpha=0.8,
                  label=f'Automatic (our), cAB, KL{kl}')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] + offset_ours, y[sel] + 0.5,
                      color='darkorange', marker='*', s=100, zorder=100500)

        # -----------------------------------------------------------------
        # Chondr, ThCtAB
        data = df_sel[df_sel['author'] == 'chondr']
        data = data.set_index('feature_code')

        d = data.loc[features_vc_thctab, :]
        d['x'] = range(len(d))

        x = d['x']
        y = d['odds_ratio']
        ax[p].bar(x + offset_chondr, y, width=width,
                  color=color_chondr, zorder=100500, alpha=0.8,
                  label=f'Semi-automatic (Chondrometrics), tAB, KL{kl}')

        sel = d['pvalue'] < 0.05
        ax[p].scatter(x[sel] + offset_chondr, y[sel] + 0.5,
                      color='teal', marker='*', s=100, zorder=100500)

        # -----------------------------------------------------------------
        variables = [e.replace('ThCtAB', 'ThC').replace('.MEAN', '')
                     for e in features_vc_thctab]
        ax[p].set_xticks(range(len(variables)))
        ax[p].set_xticklabels(variables, rotation=30)

        ax[p].grid(axis='y')
    #         ax[p].legend(bbox_to_anchor=(0., 1.06, 1., .102),
    #                      ncol=3, mode="expand", borderaxespad=0.5)

        ax[p].set_ylabel('Odds ratio')
        ax[p].set_xlabel('Morphological feature')
        ax[p].set_ylim((0, 11.3))

    plt.tight_layout()

    plt.savefig(Path(path_out, f"{k_base}-klwise_{side}.png"), dpi=300)
    plt.show()
#     break


    # LEGEND
    fig_leg = plt.figure(figsize=(9, 2))
    ax_leg = fig_leg.add_subplot(111)

    tmp_h, tmp_l = ax[p].get_legend_handles_labels()
    ax_leg.legend(tmp_h[0::2] + tmp_h[1::2],
                  tmp_l[0::2] + tmp_l[1::2],
                  loc='center',
                  ncol=2, mode="expand",
                 )
    ax_leg.axis('off')

    plt.tight_layout()
    fig_leg.savefig(Path(path_out, "legend.png"), dpi=300)
    plt.show()

# Rev. KL-wise profiling

In [None]:
dfs = dict()
path_data = ('/home/egor/Workspace/proj_scartan'
             '/results/t/tables_t23_KL-wise.csv')
path_out = "/home/egor/"

df_vis = pd.read_csv(path_data)

#N = len(features_vc_thctab)
matplotlib.rcParams.update({'font.size': 14})

ks = [
    ("DSC", (0.55, 1.05), "DSC"),
    ("ASSD", (0.0, 2.2), "ASSD, $\it{mm}$"),
    ("HD", (0.0, 10.0), "HD, $\it{mm}$"),
]

for k in ks:
    fig, ax = plt.subplots(figsize=(14, 4))
    width = 0.18
    
    data_x = df_vis[f"Tissue_subregion"]
    x = np.arange(0, len(data_x), step=1)
    # -----------------------------------------------------------------
    data_y = df_vis[f"{k[0]}_ALL"]
    y_means = [float(e.split(" ")[0]) for e in data_y.tolist()]
    y_stds = [float(e.split(" ")[1][1:-1]) for e in data_y.tolist()]

    ax.bar(x - 1.7 * width, y_means, yerr=y_stds, width=width, alpha=0.8,
           color='indigo', zorder=100500, label='all')

    # -----------------------------------------------------------------
    print(k[0])  # DEBUG
    print(variables)
    
    data_y = df_vis[f"{k[0]}_KL2"]
    y_means = [float(e.split(" ")[0]) for e in data_y.tolist()]
    y_stds = [float(e.split(" ")[1][1:-1]) for e in data_y.tolist()]
    print("KL2")
    print(y_means)  # DEBUG

    ax.bar(x - 0.6 * width, y_means, yerr=y_stds, width=width, alpha=0.8,
           color='gold', zorder=100500, label='KL2')
    
    data_y = df_vis[f"{k[0]}_KL3"]
    y_means = [float(e.split(" ")[0]) for e in data_y.tolist()]
    y_stds = [float(e.split(" ")[1][1:-1]) for e in data_y.tolist()]

    ax.bar(x + 0.6 * width, y_means, yerr=y_stds, width=width, alpha=0.8,
           color='orange', zorder=100500, label='KL3')
    
    data_y = df_vis[f"{k[0]}_KL4"]
    y_means = [float(e.split(" ")[0]) for e in data_y.tolist()]
    y_stds = [float(e.split(" ")[1][1:-1]) for e in data_y.tolist()]
    print("KL4")
    print(y_means)  # DEBUG

    ax.bar(x + 1.7 * width, y_means, yerr=y_stds, width=width, alpha=0.8,
           color='darkorange', zorder=100500, label='KL4')
    
    # -----------------------------------------------------------------
    variables = data_x
    ax.set_xticks(range(len(variables)))
    ax.set_xticklabels(variables, rotation=30)

    ax.grid(axis='y')

    ax.set_ylabel(k[2])
    ax.set_xlabel('Tissue / sub-region')
    ax.set_xlim((-1., x.size))
    ax.set_ylim(k[1])

    plt.tight_layout()

    plt.savefig(Path(path_out, f"{k[0]}.png"), dpi=300)
    plt.show()
#     break

    # LEGEND
    fig_leg = plt.figure(figsize=(6, 2))
    ax_leg = fig_leg.add_subplot(111)

    ax_leg.legend(*ax.get_legend_handles_labels(), loc='center',
                  ncol=4, mode="expand",
                 )
    ax_leg.axis('off')

    plt.tight_layout()
    fig_leg.savefig(Path(path_out, "legend.png"), dpi=300)
    plt.show()

In [None]:
dfs = dict()
path_data = ('/home/egor/Workspace/proj_scartan'
             '/results/t/tables_t23_KL-wise.csv')
path_out = "/home/egor/"

df_vis = pd.read_csv(path_data)

ks = [
    ("DSC", (0.55, 1.05), "DSC"),
    ("ASSD", (0.0, 2.2), "ASSD, $\it{mm}$"),
    ("HD", (0.0, 10.0), "HD, $\it{mm}$"),
]

for k in ks:
    data_x = df_vis[f"Tissue_subregion"]
    x = np.arange(0, len(data_x), step=1)
    
    # -----------------------------------------------------------------
    y_means = dict()
    y_stds = dict()
    
    for kl in ("ALL", "KL2", "KL3", "KL4"):
        data_y = df_vis[f"{k[0]}_{kl}"]
        y_means[kl] = np.array([float(e.split(" ")[0]) for e in data_y.tolist()])
        y_stds[kl] = np.array([float(e.split(" ")[1][1:-1]) for e in data_y.tolist()])
    
    # -----------------------------------------------------------------
#     variables = data_x

    df_vis[f"{k[0]}_sub-reg"] = data_x
    df_vis[f"{k[0]}_diff_means"] = y_means["KL4"] - y_means["KL2"]
#     df_vis[f"{ks[0]}_diff_means"] = y_means["KL4"] - y_means["KL2"]   
    
display(df_vis)

# Rev. Etc

In [None]:
%reload_ext autoreload
%autoreload 2

import os
import re
import shutil
import pickle
import argparse
from pathlib import Path
from glob import glob
from collections import defaultdict
import numpy as np
from tqdm import tqdm
from scipy import stats
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import sklearn.linear_model
import statsmodels.api as sm
plt.style.use('default')
from IPython.display import display

from scartan.datasets.meta_oai import (side_code_to_str,
                                        prefix_var_to_visit_month,
                                        release_to_visit_month)
from scartan.various import (bland_altman_plot, cohen_d,
                             cohen_d_var, r2, linreg)
from scartan.datasets.oai._assessments import (
    read_compose_asmts_mri as read_compose_asmts,
    preproc_asmts_biomediq,
    preproc_asmts_chondrometrics,
    read_info_proj_22,
    preproc_info_proj_22)
from scartan.datasets.oai._constants import (prefix_var_to_visit_month,
                                             release_to_visit_month)

In [None]:
# Load clinical data

def read_compose_clinical(paths):
    df = read_compose_asmts(paths)

    df.columns = df.columns.str.upper()
    df = df.rename(columns={"ID": "patient", })

    df = df.astype({"patient": str})

    df.loc[:, "visit_month"] = [prefix_var_to_visit_month[p]
                                for p in df["PREFIX_VAR"].tolist()]
    df.loc[:, "prefix_var"] = [p for p in df["PREFIX_VAR"].tolist()]
    df.loc[:, "visit"] = [int(p[:-1]) for p in df["visit_month"].tolist()]

    sel = [
        "patient", "PREFIX_VAR", "prefix_var", "visit_month", "visit",
        "AGE", "P01BMI", 
    ]
    df = df.loc[:, sel]
    return df


dir_clinical = Path("/home/egor/MedData/OAI_general/OAI_CompleteData_ASCII")
paths_clinical = sorted(dir_clinical.glob("AllClinical??.txt"))

df_clinical = read_compose_clinical(paths_clinical)
print(df_clinical.head())

In [None]:
# Distribution of KLs in train/test subsets

for data in ("oai_imo",):
    for subset in ("trainval_df", "test_df"):
        print(subset)
        tmp_df = sources[data][subset].copy()
        tmp_df = tmp_df.drop_duplicates(subset=["patient", "release"])
        tmp_df.loc[:, "visit_month"] = [release_to_visit_month[r]
                                        for r in tmp_df["release"].tolist()]

        print(tmp_df["patient"].tolist())
        print(tmp_df["release"].tolist())
        print(tmp_df["KL"].tolist())

        tmp_merge = tmp_df.merge(df_clinical, on=["patient", "prefix_var"], how="left")
        # print(tmp_merge)
        print(tmp_df["KL"].value_counts())

# ---

In [None]:
# Age, sex, BMI, KL of the atlases

atlases_chondr = [
    # patient , release, age   , sex     , BMI , KL
    ("9040390", "0.C.2", 48    , "Male"  , 25.9, 2),
    ("9200458", "0.C.2", 72    , "Male"  , 27.0, 1),
    ("9382271", "0.C.2", 49    , "Female", 31.0, 2),
    ("9402139", "3.E.1", 78    , "Male"  , 27.7, 1),
    ("9626069", "0.C.2", 62    , "Female", 38.3, 3),
]
atlases_biomediq = [
    ("9040390", "0.C.2", 48    , "Male"  , 25.9, 2),
    ("9200458", "0.C.2", 72    , "Male"  , 27.0, 1),
    ("9382271", "0.C.2", 49    , "Female", 31.0, 2),
    ("9626069", "0.C.2", 62    , "Female", 38.3, 3),
    ("9905156", "0.C.2", 61    , "Female", 29.0, 2),
]

# patients = ["9040390", "9200458", "9382271", "9402139", "9626069", "9905156"]
# df_tmp = df_clinical[df_clinical["patient"].isin(patients) & df_clinical["patient"]]
# display(df_tmp.head(n=30))

tmp_age = [e[2] for e in atlases_chondr]
tmp_bmi = [e[4] for e in atlases_chondr]

print(np.mean(tmp_age), np.std(tmp_age))
print(np.mean(tmp_bmi), np.std(tmp_bmi))

# ---

In [None]:
# Overlap between IMO and FBC

PATH_META_OAI_IMO = ('/home/egor/Workspace/proj_scartan'
                     '/data/91_OAI_iMorphics_full_meta/meta_dynamic.csv')

PATH_MRI_AS_ROOT = '/home/egor/MedData/OAI_general/MRI_Assessment_ASCII'
PATH_MRI_AS_SQ = Path(PATH_MRI_AS_ROOT, 'Semi-Quant')
PATH_MRI_AS_Q = Path(PATH_MRI_AS_ROOT, 'Quant')

fpaths_mri_as_biomediq = [*sorted(Path(PATH_MRI_AS_Q).glob('kmri_fnih_qcart_biomediq??.txt'))]
fpaths_mri_as_chondr = [*sorted(Path(PATH_MRI_AS_Q).glob('kmri_qcart_eckstein??.txt'))]

fpath_proj_22_info = ('/home/egor/MedData/OAI_general/'
                      'OAI_CompleteData_ASCII/Clinical_FNIH.txt')


df_proj_22 = read_info_proj_22(fpath_proj_22_info)
df_proj_22_proc = preproc_info_proj_22(df_proj_22)
display(df_proj_22_proc.head())

df_chondr = read_compose_asmts_mri(fpaths_mri_as_chondr, verbose=True)
df_chondr_proc = preproc_asmts_chondrometrics(df_chondr, projects=('22', ))
display(df_chondr_proc.head())

df_trainval = pd.read_csv("/home/egor/Workspace/proj_scartan/rev/trainval_df.csv")
df_test = pd.read_csv("/home/egor/Workspace/proj_scartan/rev/test_df.csv")

df_trainval.loc[:, "subset"] = "trainval"
df_test.loc[:, "subset"] = "test"
df_imo = pd.concat([df_trainval, df_test], axis=0, ignore_index=True)
display(df_imo.head())

df_imo = df_imo.astype({"patient": str})
df_chondr_proc = df_chondr_proc.astype({"patient": str})

In [None]:
df_overlap = df_imo.merge(df_chondr_proc, on=["patient", "prefix_var", "side"], how="inner")
df_overlap = df_overlap.merge(df_proj_22_proc, on=["patient", "side"], how="left")


# display(df_overlap)
print(len(df_overlap))
print(df_overlap["patient"].tolist())
print(df_overlap["subset"].tolist())
print(df_overlap["GROUPTYPE"].value_counts())

for subset in ("trainval", "test"):
    t = df_overlap[df_overlap["subset"] == subset]
    print()
    print(f"Subset {subset} has overlap of:")
    print(f"- {len(t)} scans")
    print(f"- {len(pd.unique(t['patient']))} scans")
    print(pd.unique(t["patient"]).tolist())