In [None]:
from fishjaw.util import files

in_dir = files.script_out_dir() / "jaw_segmentations"
img_in_dir = in_dir / "imgs"
mask_in_dir = in_dir / "masks"

img_paths = sorted(list(img_in_dir.glob("*.tif")))
mask_paths = sorted(list(mask_in_dir.glob("*.tif")))

In [None]:
%%capture
from fishjaw.inference import read

# Exclude the contrast enhanced and bad segmentations
exclude = [
    read.is_excluded(
        read.fish_number(f), exclude_train_data=False, exclude_unknown_age=False
    )
    for f in img_paths
]

mask_paths = [m for m, e in zip(mask_paths, exclude) if not e]
img_paths = [i for i, e in zip(img_paths, exclude) if not e]

In [None]:
# Read in the masks
import tifffile
from tqdm.notebook import tqdm

masks = [tifffile.imread(f) for f in tqdm(mask_paths)]

In [None]:
# Read in the greyscale
imgs = [tifffile.imread(f) for f in tqdm(img_paths)]

In [None]:
# Get the metadata

metadata = [read.metadata(read.fish_number(f)) for f in img_paths]

In [None]:
from radiomics import featureextractor
import SimpleITK as sitk
import pandas as pd
import numpy as np

params_file = "radiomics_config.yaml"
extractor = featureextractor.RadiomicsFeatureExtractor(params_file)

cases = [
    (img_array, mask_array, m)
    for (img_array, mask_array, m) in zip(imgs, masks, metadata)
]

features_list = []
for img_array, mask_array, mdata in tqdm(cases):
    # Convert numpy arrays to SimpleITK images
    img = sitk.GetImageFromArray(img_array)
    mask = sitk.GetImageFromArray(mask_array.astype(np.uint8))

    img.SetSpacing(mdata.voxel_size)
    mask.SetSpacing(mdata.voxel_size)

    # Extract features
    result = extractor.execute(img, mask)

    # Keep only numeric features
    result_clean = {
        k: v for k, v in result.items() if isinstance(v, (int, float, np.ndarray))
    }
    result_clean["ID"] = mdata.n

    features_list.append(result_clean)

In [None]:
features_df = pd.DataFrame(features_list).set_index("ID")
print(features_df.shape)
features_df.head()

In [None]:
features_df.to_csv("features.csv")

In [None]:
import pandas as pd
features_df = pd.read_csv("features.csv", index_col=0)

features_df.head()

In [None]:
"""
Add a column describing the mutation status (wt/het/hom/mosaic)
"""
from fishjaw.inference import feature_selection

features_df = feature_selection.add_metadata_cols(features_df)
features_df.head()

In [None]:
"""
Remove features with zero variance
"""

null_variance_cols = features_df["Features"].columns[features_df["Features"].var() == 0]
features_df.drop(columns=null_variance_cols, inplace=True, level=1)

print(f"Dropped:\n\t", ", ".join(null_variance_cols))
features_df.head()

In [None]:
"""
Show variance of the others
"""

In [None]:
"""
Plot correlations
"""

In [None]:
"""
Drop highly correlated features
"""

In [None]:
"""
Z-normalise the remaining features
"""

In [None]:
import seaborn as sns
sns.pairplot(features_df)

In [None]:
"""
PCA and biplot to get an idea of what good descriptors might be
"""