In [2]:
import os
import nibabel as nib
from scipy.ndimage import gaussian_filter

def apply_gaussian_filter(input_dir,output_dir, sigma):
    
    nii_image = nib.load(input_dir)
    
    im_data = nii_image.get_fdata()
    
    smoth_data = gaussian_filter(im_data, sigma)
    
    smoth_data[smoth_data != 0] = 0
    
    mod_nii_image = nib.Nifti1Image(smoth_data, nii_image.affine)
    
    nib.save(mod_nii_image, output_dir)
    

#define var
input_dir = 'ATR_data'
output_dir = 'ATR_smoth'
sigma = 10.65

for filename in os.listdir(input_dir):
        if filename.endswith('.nii.gz'):
            input_path = os.path.join(input_dir, filename)
            output_path = os.path.join(output_dir, f'smoth_{filename}')

            apply_gaussian_filter(input_path, output_path , sigma)

In [9]:
import os
import nibabel as nib
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.model_selection import KFold
from scipy import ndimage as nd
from scipy import stats

def extract_features(image_path, num_bins=100):
    nii_image =nib.load(image_path)
    
    image_data = nii_image.get_fdata()
    
    flat_data = image_data.ravel()
    
    hist, _ = np.histogram(flat_data, bins=num_bins)
    
    histo_norm = hist / hist.sum()
    
    return histo_norm

def extract_stat_features(image_path):
    features = []
    
    nii_image =nib.load(image_path)
    
    image_data = nii_image.get_fdata()
    
    non_zero = image_data[np.nonzero(image_data)]
    
    mean = np.mean(non_zero)
    
    median = nd.median(non_zero)
    
    #maximum = np.max(non_zero)
    
    std = nd.standard_deviation(non_zero)
    
    var = nd.variance(non_zero)
    
    skew = stats.skew(non_zero,axis=None)
    
    kurtosis = stats.kurtosis(non_zero,axis=None)
    
    features.append([mean,median,std,var,skew,kurtosis])
    
    return features

#define directorys

input_dir = 'ATR_smoth'
excel_file = 'ATR_training.xlsx'
num_bins = 50

#read labels
labels_df = pd.read_excel(excel_file)
labels = labels_df['label'].values

# Extract histo features for all images

hist_list = []
stat_list = []
file_list = sorted(os.listdir(input_dir))
for filename in file_list:
    if filename.endswith('.nii.gz'):
        input_path = os.path.join(input_dir, filename)
        hist_features = extract_features(input_path, num_bins)
        stat_features = extract_stat_features(input_path)
        hist_list.append(hist_features)
        stat_list.append(stat_features)
        
        
np.save('features_stats.npy', np.array(stat_list))
np.save('features_histogram.npy', np.array(hist_list))

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  vals_c = vals - vals.mean()
  return sum_c_sq / np.asanyarray(count).astype(float)
  mean = a.mean(axis, keepdims=True)
  ret = um.true_divide(
  mean = a.mean(axis, keepdims=True)


In [1]:
import numpy as np
feature_stats = np.load('stat_features.npy')
feature_histogram = np.load('features_histogram.npy')
features = np.concatenate((feature_stats,feature_histogram),axis=1)

In [2]:
import pandas as pd
import time
import nibabel as nb
from nilearn import plotting as plot
from nilearn import image as img
import matplotlib.pyplot as plt
import numpy as np
from scipy import ndimage as nd
from scipy import stats

df = pd.read_excel("ATR_GT_Training_og.xlsx",header = None, names=['File Name','Label'])
df['File Name'] = df['File Name'].map(lambda x: x.lstrip("'").rstrip("'"))
df['Image'] = df['File Name'].map(lambda x: nb.load('ATR_data/' + str(x) + '.nii.gz'))
#feature_labels = df.to_numpy()

In [3]:
features = np.concatenate((feature_stats,feature_histogram),axis=1)

X_train = features[:1005]
y_train = df.Label[:1005]


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.model_selection import KFold
#standarize the features 

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)

#Cross Validation function for each classifier
def cross_val(clf,X,y,clf_string, cv=5):
    scores = cross_val_score(clf,X, y, cv=cv)
    print('Clf: {}\nAccuracy Mean: {:0.2f}\nStandard Deviation: {:0.2f}'.format(clf_string, scores.mean(), scores.std()))


#Testing on multiple models
clfs = []
#C = 5 is good optimization parameter to keep the misclassification rate lower and keep the hyperplane classifying point correcly
svm = SVC(kernel='linear', C=5)
clfs.append([svm,'Support Vector Machine'])
lr = LogisticRegression(random_state = 0, solver = 'lbfgs',multi_class='multinomial')
clfs.append([lr,'Logistic Regression'])
ada = AdaBoostClassifier(n_estimators=100)
clfs.append([ada,'AdaBoost'])
knn = KNeighborsClassifier(n_neighbors = 1, leaf_size = 1, p=1)
clfs.append([knn,'K-Neighbors'])

for clf, clf_str in clfs:
    cross_val(clf,X_scaled,y_train,clf_str)

Clf: Support Vector Machine
Accuracy Mean: 0.74
Standard Deviation: 0.01
Clf: Logistic Regression
Accuracy Mean: 0.72
Standard Deviation: 0.02
Clf: AdaBoost
Accuracy Mean: 0.64
Standard Deviation: 0.03
Clf: K-Neighbors
Accuracy Mean: 0.81
Standard Deviation: 0.02


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


SVC(C=5, kernel='linear')

In [7]:
#Predictions using the KNN classifier model
X_test_truth = features[-467:, :]

knn.fit(X_train,y_train)
y_pred = knn.predict(X_test_truth)

np_df = df.to_numpy()
np_test = np_df[-467:, :]

df_test = pd.DataFrame(np_test, columns = ['file_name','label','image'])
df_test['label'] = y_pred

df_test.to_excel('predictions_new.xlsx')

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [8]:
print(y_pred)

[0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 1. 0. 3. 0. 0. 0. 2. 0. 3. 0. 0. 0. 0. 3.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 2. 0. 3. 0. 0.
 0. 1. 0. 0. 0. 0. 0. 2. 0. 0. 2. 0. 0. 0. 1. 3. 0. 0. 0. 1. 0. 0. 0. 0.
 2. 0. 0. 0. 0. 0. 0. 3. 0. 0. 0. 1. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2.
 0. 0. 0. 0. 0. 0. 0. 0. 3. 0. 0. 0. 1. 0. 3. 0. 0. 0. 0. 0. 0. 0. 0. 2.
 0. 1. 3. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 3. 0. 0. 0. 0. 3. 0. 3.
 2. 0. 2. 0. 0. 3. 0. 2. 2. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 2. 3.
 0. 0. 0. 2. 0. 1. 0. 0. 0. 0. 0. 0. 2. 1. 3. 0. 0. 2. 0. 3. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0. 0. 3. 0. 0. 0. 0. 0. 0. 0. 0. 2.
 0. 0. 0. 0. 2. 0. 2. 0. 0. 0. 0. 2. 0. 1. 3. 2. 0. 1. 1. 0. 1. 0. 0. 0.
 0. 0. 2. 0. 3. 0. 0. 0. 0. 0. 1. 0. 3. 2. 0. 0. 0. 1. 0. 0. 3. 1. 0. 0.
 0. 0. 3. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 2. 0. 0.
 0. 3. 0. 0. 0. 2. 3. 0. 2. 0. 1. 0. 2. 0. 0. 0. 0. 3. 0. 0. 0. 0. 1. 0.
 0. 0. 0. 3. 0. 0. 0. 0. 2. 0. 1. 2. 2. 0. 0. 0. 0.