# Shape, Statistic, and Histogram Features

In [1]:
import pandas as pd
import numpy as np
import nibabel as nib
from scipy import ndimage as nd
from scipy import stats

### Load images

In [2]:
df = pd.read_csv('../data/ATR_GT_Training.csv', header=None, names=['file','label'])
df.file = df.file.map(lambda x: x.replace("'",''))
df['img'] = df.file.map(lambda x: nib.load('../data/' + str(x) + '.nii.gz'))

### Assign feature names

In [3]:
hist_labels = ['hist' + str(x) for x in range(0,10)]
new_features = 'x y z xyz xcom ycom zcom max xmax ymax zmax mean median std variance skew kurtosis entropy'.split() + hist_labels
df = df.reindex(columns = df.columns.tolist() + new_features)

### Generate and save features

In [4]:
features_shape, features_stats, features_histogram = [], [], []

for i, img in enumerate(df.img):
    
    data = img.get_fdata()
    nonzero = data[np.nonzero(data)]
    
    # Shape Features
    x,y,z = img.shape
    num_pixels = x*y*z
    max_dim = np.max(img.shape)
    mid_dim = np.median(img.shape)
    min_dim = np.min(img.shape)
    
    # Histogram
    histogram = nd.histogram(nonzero,0,2700,4)
    
    # Statistical Features
    mean = nd.mean(nonzero)
    median = nd.median(nonzero)
    max_ = np.max(nonzero)
    std = nd.standard_deviation(nonzero)
    var = nd.variance(nonzero)
    skew = stats.skew(nonzero,axis=None)
    kurtosis = stats.kurtosis(nonzero,axis=None)
    entropy = stats.entropy(histogram)
    
    features_shape.append([x,y,z,num_pixels,max_dim,mid_dim,min_dim])
    features_stats.append([mean,median,max_,std,var,skew,kurtosis,entropy])
    features_histogram.append(list(histogram))
    
    if i % 100 == 0 and i != 0:
        print('Finished ' + str(i) + ' images.')

print('Finished Feature Extraction')
np.save('features_shape.npy',    np.array(features_shape))
np.save('features_stats.npy',    np.array(features_stats))
np.save('features_histogram.npy',np.array(features_histogram))

Finished 100 images.
Finished 200 images.
Finished 300 images.
Finished 400 images.
Finished 500 images.
Finished 600 images.
Finished 700 images.
Finished 800 images.
Finished 900 images.
Finished 1000 images.
Finished 1100 images.
Finished 1200 images.
Finished 1300 images.
Finished 1400 images.
Finished Feature Extraction


In [7]:
print(str(len(df)), 'images')
print(str(len(df.columns)), 'features')

1472 images
31 features
