In [31]:
from sklearn.neighbors import KNeighborsClassifier
from keras.utils import image_dataset_from_directory
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import numpy as np

In [8]:
# Specify the path to your image dataset
dataset_training_path = "/home/layth/git/mri_analysis/archive/Training/"
dataset_testing_path = "/home/layth/git/mri_analysis/archive/Testing/"

dataset_training_path = "/Users/studentuser/WORKING_BASE/mri_analysis/archive/Training"
dataset_testing_path = "/Users/studentuser/WORKING_BASE/mri_analysis/archive/Testing"

RESIZE = 150
FLAT = RESIZE * RESIZE * 3
classes = {0: 'glioma_tumor', 1: 'meningioma_tumor', 2: 'no_tumor', 3: 'pitutary_tumor'}

In [21]:
# image_size: resizes the images
# batch_size: how many images at a time
# subset: allows me to return a tuple (train_data, val_data)
train_data = image_dataset_from_directory(directory=dataset_training_path,
                                          image_size=(RESIZE, RESIZE),
                                          batch_size=32,
                                          color_mode='grayscale',
                                          seed=42)

testing_data = image_dataset_from_directory(directory=dataset_testing_path,
                                          image_size=(RESIZE, RESIZE),
                                          batch_size=32,
                                          color_mode='grayscale',
                                          seed=42)


X_train = []
y_train = []
X_test = []
y_test = []

# train_data yields batches. I am 'melting' 
# all batches together in one big array
for images, labels in train_data:
  X_train.extend(images.numpy())
  y_train.extend(labels.numpy())
  
for images, labels in testing_data:
  X_test.extend(images.numpy())
  y_test.extend(labels.numpy())

X_train = np.array(X_train) # convert X_train to numpy array
X_train = np.divide(X_train, 255) # rescale pixels
X_train_no_color = X_train.reshape(np.shape(X_train)[0], RESIZE, RESIZE) # flatten
X_train_flat = X_train.reshape(np.shape(X_train)[0], -1) # flatten
y_train = np.array(y_train)

X_test = np.array(X_test) # convert X_val to numpy array
X_test = np.divide(X_test, 255) # rescale pixels
X_test_no_color = X_test.reshape(np.shape(X_test)[0], RESIZE, RESIZE) # flatten
X_test_flat = X_test.reshape(np.shape(X_test)[0], -1) # flatten
y_test = np.array(y_test)

print()
print(f"Flattening (if rgb) after resize: {RESIZE} * {RESIZE} * 3 = {FLAT}")
print(f"Flattening (if greyscale) after resize: {RESIZE} * {RESIZE} = {RESIZE * RESIZE}")
print()
print(f"Shape of X_train: {np.shape(X_train)}")
print(f"Shape of X_train_flat: {np.shape(X_train_flat)}")
print(f"Shape of X_train_no_color: {np.shape(X_train_no_color)}")
print(f"Shape of y_train: {np.shape(y_train)}")
print()
print(f"Shape of X_test: {np.shape(X_test)}")
print(f"Shape of X_test_flat: {np.shape(X_test_flat)}")
print(f"Shape of X_test_no_color: {np.shape(X_test_no_color)}")
print(f"Shape of y_val: {np.shape(y_test)}")

Found 2870 files belonging to 4 classes.
Found 394 files belonging to 4 classes.

Flattening (if rgb) after resize: 150 * 150 * 3 = 67500
Flattening (if greyscale) after resize: 150 * 150 = 22500

Shape of X_train: (2870, 150, 150, 1)
Shape of X_train_flat: (2870, 22500)
Shape of X_train_no_color: (2870, 150, 150)
Shape of y_train: (2870,)

Shape of X_test: (394, 150, 150, 1)
Shape of X_test_flat: (394, 22500)
Shape of X_test_no_color: (394, 150, 150)
Shape of y_val: (394,)


## KNN

- testing with flattened non-scaled rgb data

In [10]:
for k in range(1,10+1,1):
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X_train_flat, y_train)
    
    print(f"k: {k}")
    
    ypred_train        = neigh.predict(X_train_flat)
    accuracy_train     = accuracy_score(y_train, ypred_train)
    conf_matrix_train  = confusion_matrix(y_train, ypred_train)
    print('Accuracy for training data (R^2): ', accuracy_train)
    print('Confusion matrix for training data:\n', conf_matrix_train, '\n')

    ypred_test         = neigh.predict(X_test_flat)
    accuracy_test      = accuracy_score(y_test,ypred_test)
    conf_matrix_test   = confusion_matrix(y_test, ypred_test)
    print('Accuracy for test data (R^2): ', accuracy_test, '\n')
    print('Confusion matrix for test data:\n', conf_matrix_test, '\n')
    print()

k: 1
Accuracy for training data (R^2):  1.0
Confusion matrix for training data:
 [[826   0   0   0]
 [  0 822   0   0]
 [  0   0 395   0]
 [  0   0   0 827]] 

Accuracy for test data (R^2):  0.7614213197969543 

Confusion matrix for test data:
 [[ 30  22  37  11]
 [  0 112   3   0]
 [  0   0 105   0]
 [ 10   1  10  53]] 


k: 2
Accuracy for training data (R^2):  0.9564459930313589
Confusion matrix for training data:
 [[826   0   0   0]
 [ 61 761   0   0]
 [ 28  26 341   0]
 [  2   3   5 817]] 

Accuracy for test data (R^2):  0.6725888324873096 

Confusion matrix for test data:
 [[ 36  28  29   7]
 [  9 104   2   0]
 [  5   6  94   0]
 [ 12   6  25  31]] 


k: 3
Accuracy for training data (R^2):  0.9411149825783972
Confusion matrix for training data:
 [[810  15   0   1]
 [ 40 735  32  15]
 [ 32  11 336  16]
 [  3   2   2 820]] 

Accuracy for test data (R^2):  0.6116751269035533 

Confusion matrix for test data:
 [[32 12 42 14]
 [10 69 22 14]
 [ 8  1 94  2]
 [11  2 15 46]] 


k: 4
Accura

## KNN

- scaled flattened rgb data

In [13]:
scaler = MinMaxScaler().fit(X_train_flat)
X = scaler.transform(X_train_flat)

In [15]:
scaler2 = MinMaxScaler().fit(X_test_flat)
X2 = scaler.transform(X_test_flat)

In [16]:
for k in range(1,5+1,1):
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X, y_train)
    
    print(f"k: {k}")
    
    ypred_train        = neigh.predict(X)
    accuracy_train     = accuracy_score(y_train, ypred_train)
    conf_matrix_train  = confusion_matrix(y_train, ypred_train)
    print('Accuracy for training data (R^2): ', accuracy_train)
    print('Confusion matrix for training data:\n', conf_matrix_train, '\n')

    ypred_test         = neigh.predict(X2)
    accuracy_test      = accuracy_score(y_test,ypred_test)
    conf_matrix_test   = confusion_matrix(y_test, ypred_test)
    print('Accuracy for test data (R^2): ', accuracy_test, '\n')
    print('Confusion matrix for test data:\n', conf_matrix_test, '\n')
    print()

k: 1
Accuracy for training data (R^2):  1.0
Confusion matrix for training data:
 [[826   0   0   0]
 [  0 822   0   0]
 [  0   0 395   0]
 [  0   0   0 827]] 

Accuracy for test data (R^2):  0.7944162436548223 

Confusion matrix for test data:
 [[ 32  22  35  11]
 [  0 113   2   0]
 [  0   0 105   0]
 [ 10   0   1  63]] 


k: 2
Accuracy for training data (R^2):  0.9571428571428572
Confusion matrix for training data:
 [[826   0   0   0]
 [ 61 761   0   0]
 [ 28  24 343   0]
 [  2   3   5 817]] 

Accuracy for test data (R^2):  0.7030456852791879 

Confusion matrix for test data:
 [[ 39  27  27   7]
 [  9 104   2   0]
 [  6   5  94   0]
 [ 12   6  16  40]] 


k: 3
Accuracy for training data (R^2):  0.9435540069686411
Confusion matrix for training data:
 [[810  15   0   1]
 [ 37 739  30  16]
 [ 32   9 339  15]
 [  2   2   3 820]] 

Accuracy for test data (R^2):  0.6269035532994924 

Confusion matrix for test data:
 [[32 12 41 15]
 [10 70 20 15]
 [ 8  1 94  2]
 [11  2 10 51]] 


k: 4
Accura

## KNN
- flattened non scaled gray scale

In [22]:
for k in range(1,5+1,1):
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X_train_flat, y_train)
    
    print(f"k: {k}")
    
    ypred_train        = neigh.predict(X_train_flat)
    accuracy_train     = accuracy_score(y_train, ypred_train)
    conf_matrix_train  = confusion_matrix(y_train, ypred_train)
    print('Accuracy for training data (R^2): ', accuracy_train)
    print('Confusion matrix for training data:\n', conf_matrix_train, '\n')

    ypred_test         = neigh.predict(X_test_flat)
    accuracy_test      = accuracy_score(y_test,ypred_test)
    conf_matrix_test   = confusion_matrix(y_test, ypred_test)
    print('Accuracy for test data (R^2): ', accuracy_test, '\n')
    print('Confusion matrix for test data:\n', conf_matrix_test, '\n')
    print()

k: 1
Accuracy for training data (R^2):  1.0
Confusion matrix for training data:
 [[826   0   0   0]
 [  0 822   0   0]
 [  0   0 395   0]
 [  0   0   0 827]] 

Accuracy for test data (R^2):  0.7614213197969543 

Confusion matrix for test data:
 [[ 30  22  37  11]
 [  0 112   3   0]
 [  0   0 105   0]
 [ 10   1  10  53]] 


k: 2
Accuracy for training data (R^2):  0.9564459930313589
Confusion matrix for training data:
 [[826   0   0   0]
 [ 61 761   0   0]
 [ 28  26 341   0]
 [  2   3   5 817]] 

Accuracy for test data (R^2):  0.6725888324873096 

Confusion matrix for test data:
 [[ 36  28  29   7]
 [  9 104   2   0]
 [  5   6  94   0]
 [ 12   6  25  31]] 


k: 3
Accuracy for training data (R^2):  0.9411149825783972
Confusion matrix for training data:
 [[810  15   0   1]
 [ 40 735  32  15]
 [ 32  11 336  16]
 [  3   2   2 820]] 

Accuracy for test data (R^2):  0.6116751269035533 

Confusion matrix for test data:
 [[32 12 42 14]
 [10 69 22 14]
 [ 8  1 94  2]
 [11  2 15 46]] 


k: 4
Accura

## KNN
- flattened scaled gray scale

In [24]:
scaler = MinMaxScaler().fit(X_train_flat)
X = scaler.transform(X_train_flat)

scaler2 = MinMaxScaler().fit(X_test_flat)
X2 = scaler.transform(X_test_flat)

In [26]:
for k in range(1,5+1,1):
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X, y_train)
    
    print(f"k: {k}")
    
    ypred_train        = neigh.predict(X)
    accuracy_train     = accuracy_score(y_train, ypred_train)
    conf_matrix_train  = confusion_matrix(y_train, ypred_train)
    print('Accuracy for training data (R^2): ', accuracy_train)
    print('Confusion matrix for training data:\n', conf_matrix_train, '\n')

    ypred_test         = neigh.predict(X2)
    accuracy_test      = accuracy_score(y_test,ypred_test)
    conf_matrix_test   = confusion_matrix(y_test, ypred_test)
    print('Accuracy for test data (R^2): ', accuracy_test, '\n')
    print('Confusion matrix for test data:\n', conf_matrix_test, '\n')
    print()

k: 1
Accuracy for training data (R^2):  1.0
Confusion matrix for training data:
 [[826   0   0   0]
 [  0 822   0   0]
 [  0   0 395   0]
 [  0   0   0 827]] 

Accuracy for test data (R^2):  0.7944162436548223 

Confusion matrix for test data:
 [[ 32  22  35  11]
 [  0 113   2   0]
 [  0   0 105   0]
 [ 10   0   1  63]] 


k: 2
Accuracy for training data (R^2):  0.9571428571428572
Confusion matrix for training data:
 [[826   0   0   0]
 [ 61 761   0   0]
 [ 28  24 343   0]
 [  2   3   5 817]] 

Accuracy for test data (R^2):  0.7030456852791879 

Confusion matrix for test data:
 [[ 39  27  27   7]
 [  9 104   2   0]
 [  6   5  94   0]
 [ 12   6  16  40]] 


k: 3
Accuracy for training data (R^2):  0.9435540069686411
Confusion matrix for training data:
 [[810  15   0   1]
 [ 37 739  30  16]
 [ 32   9 339  15]
 [  2   2   3 820]] 

Accuracy for test data (R^2):  0.6269035532994924 

Confusion matrix for test data:
 [[32 12 41 15]
 [10 70 20 15]
 [ 8  1 94  2]
 [11  2 10 51]] 


k: 4
Accura

## KNN
- PCA then KNN

In [47]:
print(X_train_flat.max(), X_train_flat.min())
print(X_test_flat.max(), X_test_flat.min())
print(np.shape(X_train_flat))
print(np.shape(X_test_flat))

1.0 0.0
1.0 0.0
(2870, 22500)
(394, 22500)


In [52]:
k = 10

for i in range(2, k + 1):
    pca = PCA(n_components=i)
    pca_train = pca.fit_transform(X_train_flat)
    pca_test = pca.transform(X_test_flat)
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(pca_train, y_train)

    print(f"For K={i}")
    print("Training Score: ", knn.score(pca_train, y_train))
    print("testing Score: ", knn.score(pca_test, y_test))

For K=2
Training Score:  0.7174216027874565
testing Score:  0.5126903553299492
For K=3
Training Score:  0.7540069686411149
testing Score:  0.4873096446700508
For K=4
Training Score:  0.7909407665505227
testing Score:  0.5076142131979695
For K=5
Training Score:  0.8132404181184669
testing Score:  0.5431472081218274
For K=6
Training Score:  0.8254355400696864
testing Score:  0.5406091370558376
For K=7
Training Score:  0.8337979094076655
testing Score:  0.5177664974619289
For K=8
Training Score:  0.8484320557491289
testing Score:  0.4949238578680203
For K=9
Training Score:  0.8644599303135888
testing Score:  0.5152284263959391
For K=10
Training Score:  0.8665505226480836
testing Score:  0.5126903553299492
