In [None]:
from IPython.display import display
import numpy as np
import pandas as pd
import scipy.stats
import seaborn
import xarray as xr

In [None]:
import plot
import util

## Load Data ##

In [None]:
sfa = xr.open_dataset("../models/sfa/sfa.nc").load()
mri_features = xr.open_dataset("../data/processed/mri-features.nc").load()
assert all(mri_features['case'] == sfa['case'])
mri_factors = xr.open_dataset("../data/processed/mri-features-fa.nc").load()
assert all(mri_factors['case'] == sfa['case'])

In [None]:
sfa = sfa.rename({'factor': 'gexp_factor'})
sfa['gexp_factor'] = ["GF{}".format(i+1) for i in range(len(sfa['gexp_factor']))]

In [None]:
mri_factors = mri_factors.rename({'factor': 'mri_factor'})
mri_factors['mri_factor'] = ["IF{}".format(i+1) for i in range(len(mri_factors['mri_factor']))]

## Correlation Factors—MRI features ##

Compute correlation between all MRI features (except for patient number, Comment and Multifocal) and SFA factors (`factor_feature_cor`). We also compute the nominal p value (`factor_feature_cor_nom_p`) and Bonferroni adjusted p-value (`factor_feature_cor_p`).

In [None]:
numeric_mri_features = list(set(mri_features.keys()) - {'case', 'Comment', 'MultiFocal'})
mri_features_da = mri_features[numeric_mri_features].to_array('cad_feature')

In [None]:
fm_cor = util.cor(mri_features_da, sfa['factors'], 'case')
fm_cor['p'] = np.fmin(1, fm_cor['nominal_p'] * fm_cor['nominal_p'].size)

Heatmap of correlations. All of them are very low.

In [None]:
plot.heatmap(fm_cor['correlation'], cmap='coolwarm', row_dendrogram=True)

Heatmap of correlation with nominal p-values < 0.05. This is without multiple testing correction.

In [None]:
plot.heatmap(fm_cor['correlation'], mask=fm_cor['nominal_p'] > 0.05, cmap='coolwarm', row_dendrogram=True)

None of the correlation are significant after multiple testing correction.

In [None]:
np.min(fm_cor['p'].values)

In [None]:
fm_rcor = util.cor(mri_features_da, sfa['factors'], 'case', method='spearman')
fm_rcor['p'] = np.fmin(1, fm_rcor['nominal_p'] * fm_rcor['nominal_p'].size)

Heatmap of correlations. All of them are very low.

In [None]:
plot.heatmap(fm_rcor['correlation'], cmap='coolwarm', row_dendrogram=True)

Heatmap of correlation with nominal p-values < 0.05. This is without multiple testing correction.

In [None]:
plot.heatmap(fm_rcor['correlation'], mask=fm_rcor['nominal_p'] > 0.05, cmap='coolwarm', row_dendrogram=True)

None of the correlation are significant after multiple testing correction.

In [None]:
np.min(fm_rcor['p'].values)

## Factor-Factor Correlation ##

In [None]:
sfa_da = sfa['factors'].reindex_like(mri_factors['factors'])

In [None]:
ff_cor = util.cor(mri_factors['factors'], sfa_da, 'case')
ff_cor['p'] = np.fmin(1, ff_cor['nominal_p'] * ff_cor['nominal_p'].size)

Heatmap of correlations. All of them are low.

In [None]:
plot.heatmap(ff_cor['correlation'], cmap='coolwarm', row_dendrogram=True, col_dendrogram=True)

Heatmap of correlation with p-values < 0.05. This is without multiple testing correction.

In [None]:
plot.heatmap(ff_cor['correlation'], mask=ff_cor['p'] > 0.05, cmap='coolwarm', row_dendrogram=True, col_dendrogram=True)

In [None]:
ff_rcor = util.cor(mri_factors['factors'], sfa_da, 'case', method='spearman')
ff_rcor['p'] = np.minimum(ff_rcor['nominal_p'] * len(ff_rcor['nominal_p']), 1.0)

Heatmap of correlations. All of them are on the low side.

In [None]:
plot.heatmap(ff_rcor['correlation'], cmap='coolwarm', row_dendrogram=True, col_dendrogram=True)

Heatmap of correlation with p-values < 0.05.

In [None]:
plot.heatmap(ff_rcor['correlation'], mask=ff_rcor['p'] > 0.05, cmap='coolwarm', row_dendrogram=True, col_dendrogram=True)

In [None]:
plot.scatter(sfa_da.sel(gexp_factor='GF8'), mri_factors['factors'].sel(mri_factor='IF1'))

In [None]:
with plot.subplots() as (fig, ax):
    seaborn.kdeplot(sfa_da.sel(gexp_factor='GF8'), mri_factors['factors'].sel(mri_factor='IF1'), ax=ax)

In [None]:
plot.scatter(sfa_da.sel(gexp_factor='GF1'), mri_factors['factors'].sel(mri_factor='IF7'))
with plot.subplots() as (fig, ax):
    seaborn.kdeplot(sfa_da.sel(gexp_factor='GF1'), mri_factors['factors'].sel(mri_factor='IF7'),
                    ax=ax, shade=True, gridsize=250)

In [None]:
plot.scatter(sfa_da.sel(gexp_factor='GF1'), mri_factors['factors'].sel(mri_factor='IF1'))
with plot.subplots() as (fig, ax):
    seaborn.kdeplot(sfa_da.sel(gexp_factor='GF1'), mri_factors['factors'].sel(mri_factor='IF1'),
                    ax=ax, shade=True, gridsize=250)