## Filter relevant features

Features were extracted and the dataframes [combined](02-combine-dataframes.ipynb). Not all features extracted from simpleitk are usefull for the downstream process. Relevant features will be filtered and kept for subsequent analysis.

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Define path were features are stored
path = "../../measurements/wt-postnatal-development/"

In [3]:
# Load the measurements
measurements = pd.read_csv(path + "02-measurements-concatenated.csv")
measurements

Unnamed: 0,label,maximum,mean,median,minimum,sigma,sum,variance,bbox_0,bbox_1,...,label.1,surface_area,volume,bbox_volume,convex_volume,sphericity,solidity,perimeter_2d,major_axis_length_2d,minor_axis_length_2d
0,1,803.0,314.092437,282.884766,186.0,99.098496,261639.0,9820.511878,4,290,...,1,820.528198,833.0,3456,1525,0.521780,0.546230,57.153073,27.467594,6.307231
1,2,1845.0,620.114058,466.576172,174.0,407.194822,467566.0,165807.623096,5,281,...,2,417.350677,754.0,1254,865,0.959910,0.871676,27.791138,10.665223,6.330765
2,3,564.0,274.831858,253.494141,154.0,91.856545,62112.0,8437.624936,21,275,...,3,209.240112,226.0,416,274,0.857521,0.824818,19.421265,7.894590,4.064904
3,4,540.0,285.008439,268.189453,153.0,89.769054,67547.0,8058.482979,34,277,...,4,227.670990,237.0,480,284,0.813471,0.834507,19.421265,8.565180,3.459917
4,5,264.0,234.200000,238.798828,202.0,20.192821,5855.0,407.750000,45,279,...,5,80.413963,25.0,72,33,0.514177,0.757576,8.599912,3.126944,2.309401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18703,174,685.0,346.677288,326.101562,187.0,92.460071,424333.0,8548.864700,1324,341,...,174,1265.061035,1224.0,6732,3094,0.437415,0.395604,93.084047,25.469903,16.027167
18704,175,609.0,337.340530,326.101562,188.0,81.507414,343750.0,6643.458582,1351,348,...,175,978.182373,1019.0,5250,2200,0.500626,0.463182,67.974426,28.804014,11.992024
18705,176,347.0,255.431818,252.820312,193.0,37.513655,11239.0,1407.274313,1365,349,...,176,70.511078,44.0,90,54,0.854795,0.814815,12.392149,5.022825,2.508233
18706,177,345.0,247.479915,245.492188,176.0,32.455513,117058.0,1053.360295,1367,352,...,177,638.787354,473.0,3465,1333,0.459588,0.354839,59.374514,28.204786,8.149047


In [4]:
# Show the column names
measurements.keys()

Index(['label', 'maximum', 'mean', 'median', 'minimum', 'sigma', 'sum',
       'variance', 'bbox_0', 'bbox_1', 'bbox_2', 'bbox_3', 'bbox_4', 'bbox_5',
       'centroid_0', 'centroid_1', 'centroid_2', 'elongation',
       'feret_diameter', 'flatness', 'roundness',
       'equivalent_ellipsoid_diameter_0', 'equivalent_ellipsoid_diameter_1',
       'equivalent_ellipsoid_diameter_2', 'equivalent_spherical_perimeter',
       'equivalent_spherical_radius', 'number_of_pixels',
       'number_of_pixels_on_border', 'perimeter', 'perimeter_on_border',
       'perimeter_on_border_ratio', 'principal_axes0', 'principal_axes1',
       'principal_axes2', 'principal_axes3', 'principal_axes4',
       'principal_axes5', 'principal_axes6', 'principal_axes7',
       'principal_axes8', 'principal_moments0', 'principal_moments1',
       'principal_moments2', 'age', 'biol_repl', 'image_id', 'label.1',
       'surface_area', 'volume', 'bbox_volume', 'convex_volume', 'sphericity',
       'solidity', 'perimeter

### Filter relevant features

Apply a filter to keep relevant features.

In [5]:
# Filter measurements
measurements_filtered = measurements[['label', 'maximum', 'mean', 'median', 'minimum', 'sigma', 'sum',
       'variance', 'bbox_3', 'bbox_4', 'bbox_5', 'elongation',
       'feret_diameter', 'flatness', 'equivalent_ellipsoid_diameter_0', 'equivalent_ellipsoid_diameter_1',
       'equivalent_ellipsoid_diameter_2', 'equivalent_spherical_perimeter',
       'equivalent_spherical_radius', 'volume', 'surface_area', 'bbox_volume', 'convex_volume', 'sphericity',
       'solidity', 'perimeter_2d', 'major_axis_length_2d',
       'minor_axis_length_2d', 'age', 'biol_repl', 'image_id']]
measurements_filtered

Unnamed: 0,label,maximum,mean,median,minimum,sigma,sum,variance,bbox_3,bbox_4,...,bbox_volume,convex_volume,sphericity,solidity,perimeter_2d,major_axis_length_2d,minor_axis_length_2d,age,biol_repl,image_id
0,1,803.0,314.092437,282.884766,186.0,99.098496,261639.0,9820.511878,24,9,...,3456,1525,0.521780,0.546230,57.153073,27.467594,6.307231,8,1,0
1,2,1845.0,620.114058,466.576172,174.0,407.194822,467566.0,165807.623096,11,6,...,1254,865,0.959910,0.871676,27.791138,10.665223,6.330765,8,1,0
2,3,564.0,274.831858,253.494141,154.0,91.856545,62112.0,8437.624936,8,4,...,416,274,0.857521,0.824818,19.421265,7.894590,4.064904,8,1,0
3,4,540.0,285.008439,268.189453,153.0,89.769054,67547.0,8058.482979,8,4,...,480,284,0.813471,0.834507,19.421265,8.565180,3.459917,8,1,0
4,5,264.0,234.200000,238.798828,202.0,20.192821,5855.0,407.750000,3,3,...,72,33,0.514177,0.757576,8.599912,3.126944,2.309401,8,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18703,174,685.0,346.677288,326.101562,187.0,92.460071,424333.0,8548.864700,17,22,...,6732,3094,0.437415,0.395604,93.084047,25.469903,16.027167,24,28,83
18704,175,609.0,337.340530,326.101562,188.0,81.507414,343750.0,6643.458582,15,25,...,5250,2200,0.500626,0.463182,67.974426,28.804014,11.992024,24,28,83
18705,176,347.0,255.431818,252.820312,193.0,37.513655,11239.0,1407.274313,5,3,...,90,54,0.854795,0.814815,12.392149,5.022825,2.508233,24,28,83
18706,177,345.0,247.479915,245.492188,176.0,32.455513,117058.0,1053.360295,21,15,...,3465,1333,0.459588,0.354839,59.374514,28.204786,8.149047,24,28,83


In [6]:
measurements_filtered.to_csv(path + "03-relevant-features.csv", index = False)