In [57]:
# special functions for using pyradiomics
from SimpleITK import GetImageFromArray
import radiomics
from radiomics.featureextractor import RadiomicsFeatureExtractor # This module is used for interaction with pyradiomic
import logging
logging.getLogger('radiomics').setLevel(logging.CRITICAL + 1)  # this tool makes a whole TON of log noise

In [58]:
texture_extractor = RadiomicsFeatureExtractor(verbose=False)
texture_extractor.disableAllFeatures()
_text_feat = {ckey: [] for ckey in texture_extractor.featureClassNames}
texture_extractor.enableFeaturesByName(**_text_feat)

print('Extraction parameters:\n\t', texture_extractor.settings)
print('Enabled filters:\n\t', texture_extractor.enabledImagetypes)
print('Enabled features:\n\t', texture_extractor.enabledFeatures)

Extraction parameters:
	 {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 1, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 'sitkBSpline', 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True, 'verbose': False}
Enabled filters:
	 {'Original': {}}
Enabled features:
	 {'firstorder': [], 'glcm': [], 'gldm': [], 'glrlm': [], 'glszm': [], 'ngtdm': [], 'shape': [], 'shape2D': []}


In [59]:
import numpy as np # for manipulating 3d images
import pandas as pd # for reading and writing tables
import h5py # for reading the image files
import skimage # for image processing and visualizations
import sklearn # for machine learning and statistical models
import os # help us load files and deal with paths
from pathlib import Path # help manage files

In [60]:
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
plt.rcParams["figure.figsize"] = (8, 8)
plt.rcParams["figure.dpi"] = 125
plt.rcParams["font.size"] = 14
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['DejaVu Sans']
plt.style.use('ggplot')
sns.set_style("whitegrid", {'axes.grid': False})

In [61]:
from pathlib import Path
import pandas as pd

# Set the directory for your dataset
data_dir = Path('C:\\Users\\abinr\\Documents\\Alzheimer_s_Dataset\\Alzheimer_s Dataset\\val(jpg)\\VeryMildDemented')

# Create a DataFrame with paths to all .jpg files in the data directory
all_df = pd.DataFrame({'jpg_path': list(data_dir.glob('*.jpg'))})  # Updated to look only in the specified directory

# Extract the file ID (filename without extension)
all_df['file_id'] = all_df['jpg_path'].map(lambda x: x.stem)

# Since all images are in one folder, you might not need to extract the training group
# If you still need to identify a group or category, you can modify this part accordingly
# For now, we assume no training group is needed, but you can add a placeholder or specific logic if required

# Extract scan ID from the file ID, ensuring it's a number or setting it to None
all_df['scan_id'] = all_df['file_id'].map(lambda x: int(x.split('_')[-1]) if x.split('_')[-1].isdigit() else None)

# Display the first 5 rows of the DataFrame
all_df.head(5)


Unnamed: 0,jpg_path,file_id,scan_id
0,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,26 (44),
1,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,26 (45),
2,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,26 (46),
3,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,26 (47),
4,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,26 (48),


In [62]:
from PIL import Image
import numpy as np

# Assuming all_df is your DataFrame and has been defined in a previous cell

# Take the first row as a sample scan
sample_scan = all_df.iloc[0]
print(sample_scan)

# Define a function to read image data from a JPEG file
def read_scan(jpg_path):
    with Image.open(jpg_path) as img:
        return np.array(img)

# Use 'jpg_path' to get the path of the JPEG file and read the image data
image_data = read_scan(sample_scan['jpg_path'])
print('Image Shape:', image_data.shape)


jpg_path    C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...
file_id                                               26 (44)
scan_id                                                   NaN
Name: 0, dtype: object
Image Shape: (208, 176)


In [63]:
%%time
results = texture_extractor.execute(GetImageFromArray(image_data),
                            GetImageFromArray((image_data>0).astype(np.uint8)))

CPU times: total: 15.6 ms
Wall time: 120 ms


  highBound = maximum + 2 * binWidth


In [64]:
pd.DataFrame([results]).T

Unnamed: 0,0
diagnostics_Versions_PyRadiomics,3.1.0a2.post14+gaab3c6f
diagnostics_Versions_Numpy,2.1.0
diagnostics_Versions_SimpleITK,2.4.0
diagnostics_Versions_PyWavelet,1.7.0
diagnostics_Versions_Python,3.12.3
...,...
original_ngtdm_Busyness,52.6863469514088
original_ngtdm_Coarseness,0.005208425093138252
original_ngtdm_Complexity,0.020462178133448285
original_ngtdm_Contrast,0.0012676518150322835


In [65]:
def calc_radiomics(in_image_data):
    return texture_extractor.execute(GetImageFromArray(in_image_data),
                            GetImageFromArray((in_image_data>0).astype(np.uint8)))

In [66]:
from radiomics import featureextractor
import SimpleITK as sitk

# Initialize feature extractor
params = {}
extractor = featureextractor.RadiomicsFeatureExtractor(params)

# Define a function to extract radiomics features
def calc_radiomics(image_path):
    image = sitk.ReadImage(image_path)
    mask = sitk.OtsuThreshold(image, 0, 1, 200)  # Dummy mask; replace with your actual mask
    features = extractor.execute(image, mask)
    return {key: features[key] for key in features.keys()}

# Define the function to read the image
def read_scan(jpg_path):
    return sitk.ReadImage(jpg_path)

# Assuming all_df is defined and contains a 'jpg_path' column
all_df['radiomics'] = all_df['jpg_path'].map(lambda c_filename: calc_radiomics(c_filename))




  highBound = maximum + 2 * binWidth
  binEdges = numpy.arange(lowBound, highBound, binWidth)


In [67]:
import pandas as pd

# Collecting radiomics data in a list
radiomics_data_list = []
for _, row in all_df.iterrows():
    radiomics_data = row['radiomics'] if isinstance(row['radiomics'], dict) else {}
    radiomics_data_list.append(radiomics_data)

# Creating a DataFrame from the list of dictionaries
radiomics_df = pd.DataFrame(radiomics_data_list)

# Concatenate the original all_df (without the 'radiomics' column) with the radiomics_df
full_df = pd.concat([all_df.drop('radiomics', axis=1).reset_index(drop=True), radiomics_df.reset_index(drop=True)], axis=1)

print(full_df.shape, 'data prepared')



# export the whole table
full_df.to_csv('VeryMildDemented.csv', index=False)
full_df.sample(3)


(448, 127) data prepared


Unnamed: 0,jpg_path,file_id,scan_id,diagnostics_Versions_PyRadiomics,diagnostics_Versions_Numpy,diagnostics_Versions_SimpleITK,diagnostics_Versions_PyWavelet,diagnostics_Versions_Python,diagnostics_Configuration_Settings,diagnostics_Configuration_EnabledImageTypes,...,original_glszm_SmallAreaHighGrayLevelEmphasis,original_glszm_SmallAreaLowGrayLevelEmphasis,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength
214,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,29 (52),,3.1.0a2.post14+gaab3c6f,2.1.0,2.4.0,1.7.0,3.12.3,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},...,4.16407903507106,0.1026637120194999,6.229716700525164,0.0880702610392778,3585.2519343262657,27.747323839717943,0.0012749944602157,7.0396573621713205,0.0558368566400364,0.0227117984780978
81,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,27 (59),,3.1.0a2.post14+gaab3c6f,2.1.0,2.4.0,1.7.0,3.12.3,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},...,5.201718757352339,0.1748972314519604,5.6773383054479245,0.1247742571006402,3469.309001038781,21.95041999825655,0.0012636283541068,10.024320880414963,0.0393390720440246,0.025766833992888
232,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,29 (69),,3.1.0a2.post14+gaab3c6f,2.1.0,2.4.0,1.7.0,3.12.3,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},...,5.370114976054773,0.1466631910583658,6.180994570417277,0.0831425828841467,8368.081835289886,22.10558419930649,0.0011779160455197,9.310551022873383,0.0349519126604516,0.0253119373423319


In [68]:
full_df = pd.DataFrame([dict(**c_row.pop('radiomics'), **c_row) for _, c_row in all_df.iterrows()])
print(full_df.shape, 'data prepared')
first_cols = all_df.columns[:-1].tolist()
full_df = full_df[first_cols + [c_col for c_col in full_df.columns
                                if c_col not in first_cols]]
# export the whole table
full_df.to_csv('VeryMildDemented.csv', index=False)
full_df.sample(3)



(448, 127) data prepared


Unnamed: 0,jpg_path,file_id,scan_id,diagnostics_Versions_PyRadiomics,diagnostics_Versions_Numpy,diagnostics_Versions_SimpleITK,diagnostics_Versions_PyWavelet,diagnostics_Versions_Python,diagnostics_Configuration_Settings,diagnostics_Configuration_EnabledImageTypes,...,original_glszm_SmallAreaHighGrayLevelEmphasis,original_glszm_SmallAreaLowGrayLevelEmphasis,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength
194,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,29 (34),,3.1.0a2.post14+gaab3c6f,2.1.0,2.4.0,1.7.0,3.12.3,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},...,4.314923560150515,0.1752610767473512,6.134110431808175,0.0715908258225251,6861.261019832818,19.94113616838724,0.0013098481365568,8.489313164958231,0.0345562408944552,0.0280750329836458
320,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,31 (21),,3.1.0a2.post14+gaab3c6f,2.1.0,2.4.0,1.7.0,3.12.3,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},...,6.137833176347266,0.0736927769854471,6.077927533562462,0.0876859218263576,7140.057417068341,18.75379029790985,0.0013594199567138,9.703243056679558,0.0419330773442984,0.0311584961460867
430,C:\Users\abinr\Documents\Alzheimer_s_Dataset\A...,32 (58),,3.1.0a2.post14+gaab3c6f,2.1.0,2.4.0,1.7.0,3.12.3,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},...,4.957149080401636,0.2190207346256964,5.999466885050311,0.0674967680479009,27766.598025128445,20.50996058608317,0.0012665099355235,8.041014522163358,0.0317552970706342,0.0272815508999843


In [69]:
import pandas as pd

# Assuming all_df has been defined and prepared earlier

# Create the full DataFrame
full_df = pd.DataFrame([dict(**c_row.pop('radiomics'), **c_row) for _, c_row in all_df.iterrows()])
print(full_df.shape, 'data prepared')

# Define first columns excluding the last one
first_cols = all_df.columns[:-1].tolist()
# Reorder the DataFrame
full_df = full_df[first_cols + [c_col for c_col in full_df.columns if c_col not in first_cols]]

# Export the DataFrame to an Excel file
# Change this to your desired path
output_path = 'C:\\Users\\abinr\\Documents\\Alzheimer_s_Dataset\\Alzheimer_s Dataset\\val(jpg)\\VeryMildDemented.xlsx'
full_df.to_excel(output_path, index=False)


# Display a sample of the DataFrame
# print(full_df.sample(3))


(448, 127) data prepared


In [70]:
value_feature_names = [c_col for c_col in full_df.columns if (c_col.startswith('original') and '_shape_' not in c_col)]
print(np.random.choice(value_feature_names, 3), 'of', len(value_feature_names))

['original_ngtdm_Contrast' 'original_glcm_MCC'
 'original_glrlm_RunLengthNonUniformityNormalized'] of 102
