In [None]:
from IPython.display import display
import numpy as np
import scipy.stats
import xarray as xr

In [None]:
import plot

## Load Data ##

In [None]:
sfa = xr.open_dataset('../models/sfa.nc')
display(sfa)

In [None]:
mri = xr.open_dataset('../data/processed/mri-features.nc')
assert all(mri['case'] == sfa['case'])
display(mri)

## Correlation Factors—MRI features ##

Compute correlation between all MRI features (except for patient number, Comment and Multifocal) and SFA factors (`factor_feature_cor`). We also compute the nominal p value (`factor_feature_cor_nom_p`) and Bonferroni adjusted p-value (`factor_feature_cor_p`).

In [None]:
mri_features = list(set(mri.keys()) - {'case', 'Comment', 'MultiFocal'})
factor_feature_cor = xr.DataArray(
    np.full((sfa['factor'].shape[0], len(mri_features)), np.nan),
    dims=['factor', 'mri_feature'],
    coords={
        'factor': sfa['factor'],
        'mri_feature': mri_features,
    },
)
factor_feature_cor.name = "Pearson correlation"
factor_feature_cor_nom_p = factor_feature_cor.copy()

for feature in factor_feature_cor['mri_feature']:
    feature = feature.item()
    for factor in factor_feature_cor['factor']:
        factor = factor.item()
        mri_v = mri[feature].values
        factor_v = sfa['factors'].loc[{'factor': factor}].values
        
        not_nan = np.logical_not(np.isnan(factor_v) | np.isnan(mri_v))
        c, p = scipy.stats.pearsonr(mri_v[not_nan], factor_v[not_nan])
        
        factor_feature_cor.loc[{'factor': factor, 'mri_feature': feature}] = c
        factor_feature_cor_nom_p.loc[{'factor': factor, 'mri_feature': feature}] = p
        
factor_feature_cor_p = np.fmax(
    factor_feature_cor_nom_p *
    factor_feature_cor_nom_p.shape[0] *
    factor_feature_cor_nom_p.shape[0],
    1.0)

short_factor_name = [f.item()[7:] for f in sfa['factor']]

Heatmap of correlations. All of them are very low.

In [None]:
plot.heatmap(factor_feature_cor.T, cmap='coolwarm', row_dendrogram=True, xticklabels=short_factor_name)

Heatmap of correlation with nominal p-values < 0.05. This is without multiple testing correction.

In [None]:
plot.heatmap(factor_feature_cor.T, mask=factor_feature_cor_nom_p.T > 0.05, cmap='coolwarm', row_dendrogram=True, xticklabels=short_factor_name)

None of the correlation are significant after multiple testing correction.

In [None]:
plot.heatmap(factor_feature_cor.T, mask=factor_feature_cor_p.T > 0.25, cmap='coolwarm', row_dendrogram=True, xticklabels=short_factor_name)