<a href="https://colab.research.google.com/github/NvdSuni/Thesis-code-complete/blob/main/X_ray_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and loading data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import matplotlib.pyplot as plt

import numpy as np
from PIL import Image
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model


In [None]:
# Print versions
print(f"numpy: {np.__version__}")
print(f"PIL (Pillow): {Image.__version__}")
print(f"tensorflow: {tf.__version__}")

In [None]:
!pip install vit-keras
!pip install timm

#Preprocessing

In [None]:
def get_image_paths(data_dir):
    image_paths = []
    for class_dir in os.listdir(data_dir):
        class_path = os.path.join(data_dir, class_dir)
        for image_file in os.listdir(class_path):
            image_path = os.path.join(class_path, image_file)
            image_paths.append(image_path)
    return image_paths

train_image_dir = '/content/drive/My Drive/Tilburg University/Thesis/Master Thesis/KneeXrayData/ClsKLData/kneeKL299/train'
val_image_dir = '/content/drive/My Drive/Master Thesis/KneeXrayData/ClsKLData/kneeKL299/val'
test_image_dir = '/content/drive/My Drive/Master Thesis/KneeXrayData/ClsKLData/kneeKL299/test'

train_image_paths = get_image_paths(train_image_dir)
val_image_paths = get_image_paths(val_image_dir)
test_image_paths = get_image_paths(test_image_dir)

num_classes = 5

def create_labels(image_paths):
    labels = [int(image_path.split("/")[-2]) for image_path in image_paths]
    labels = np.array(labels)
    labels = to_categorical(labels, num_classes)
    return labels

train_labels = create_labels(train_image_paths)
val_labels = create_labels(val_image_paths)
test_labels = create_labels(test_image_paths)


image_size = (224, 224)

#Manual preprocessing for the training set
preprocessed_train_data = []
for image_path in train_image_paths:
    with Image.open(image_path) as image:
        image = image.resize(image_size, Image.LANCZOS)
        preprocessed_train_data.append(np.array(image) / 255.0)

#Manual preprocessing for the validation set
preprocessed_val_data = []
for image_path in val_image_paths:
    with Image.open(image_path) as image:
        image = image.resize(image_size, Image.LANCZOS)
        preprocessed_val_data.append(np.array(image) / 255.0)

#Manual preprocessing for the test set
preprocessed_test_data = []
for image_path in test_image_paths:
    with Image.open(image_path) as image:
        image = image.resize(image_size, Image.LANCZOS)
        preprocessed_test_data.append(np.array(image) / 255.0)

#Save preprocessed data
np.save('/content/drive/My Drive/Master Thesis/KneeXrayData/preprocessed_train_data.npy', np.array(preprocessed_train_data))
np.save('/content/drive/My Drive/Master Thesis/KneeXrayData/preprocessed_val_data.npy', np.array(preprocessed_val_data))
np.save('/content/drive/My Drive/Master Thesis/KneeXrayData/preprocessed_test_data.npy', np.array(preprocessed_test_data))
np.save('/content/drive/My Drive/Master Thesis/KneeXrayData/preprocessed_train_labels.npy', np.array(train_labels))
np.save('/content/drive/My Drive/Master Thesis/KneeXrayData/preprocessed_val_labels.npy', np.array(val_labels))
np.save('/content/drive/My Drive/Master Thesis/KneeXrayData/preprocessed_test_labels.npy', np.array(test_labels))


In [None]:
train_image_dir = '/content/drive/My Drive/Tilburg University/Thesis/Master Thesis/KneeXrayData/ClsKLData/kneeKL224/train'
val_image_dir = '/content/drive/My Drive/Tilburg University/Thesis/Master Thesis/KneeXrayData/ClsKLData/kneeKL224/val'
test_image_dir = '/content/drive/My Drive/Tilburg University/Thesis/Master Thesis/KneeXrayData/ClsKLData/kneeKL224/test'

train_image_paths = get_image_paths(train_image_dir)
val_image_paths = get_image_paths(val_image_dir)
test_image_paths = get_image_paths(test_image_dir)

num_classes = 5

def create_labels(image_paths):
    labels = [int(image_path.split("/")[-2]) for image_path in image_paths]
    labels = np.array(labels)
    labels = to_categorical(labels, num_classes)
    return labels

train_labels_224 = create_labels(train_image_paths)
val_labels_224 = create_labels(val_image_paths)
test_labels_224 = create_labels(test_image_paths)

image_size = (224, 224)

#Manual preprocessing for the training set
preprocessed_train_data_224 = []
for image_path in train_image_paths:
    with Image.open(image_path) as image:
        image = image.resize(image_size, Image.LANCZOS)
        preprocessed_train_data_224.append(np.array(image) / 255.0)

#Manual preprocessing for the validation set
preprocessed_val_data_224 = []
for image_path in val_image_paths:
    with Image.open(image_path) as image:
        image = image.resize(image_size, Image.LANCZOS)
        preprocessed_val_data_224.append(np.array(image) / 255.0)

#Manual preprocessing for the test set
preprocessed_test_data_224 = []
for image_path in test_image_paths:
    with Image.open(image_path) as image:
        image = image.resize(image_size, Image.LANCZOS)
        preprocessed_test_data_224.append(np.array(image) / 255.0)

#Save preprocessed data
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_data_224.npy', np.array(preprocessed_train_data_224))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_data_224.npy', np.array(preprocessed_val_data_224))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_data_224.npy', np.array(preprocessed_test_data_224))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_labels_224.npy', np.array(train_labels_224))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_labels_224.npy', np.array(val_labels_224))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_labels_224.npy', np.array(test_labels_224))

In [None]:
preprocessed_train_data = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_data.npy')
preprocessed_val_data = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_data.npy')
preprocessed_test_data = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_data.npy')
train_labels = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_labels.npy')
val_labels = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_labels.npy')
test_labels = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_labels.npy')

In [None]:
preprocessed_train_data_224 = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_data_224.npy')
preprocessed_val_data_224 = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_data_224.npy')
preprocessed_test_data_224 = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_data_224.npy')
train_labels_224 = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_labels_224.npy')
val_labels_224 = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_labels_224.npy')
test_labels_224 = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_labels_224.npy')

In [None]:
#Combine preprocessed training data
preprocessed_train_data_combined = np.concatenate((preprocessed_train_data, preprocessed_train_data_224), axis=0)
preprocessed_val_data_combined = np.concatenate((preprocessed_val_data, preprocessed_val_data_224), axis=0)
preprocessed_test_data_combined = np.concatenate((preprocessed_test_data, preprocessed_test_data_224), axis=0)

#Combine corresponding training labels
train_labels_combined = np.concatenate((train_labels, train_labels_224), axis=0)
val_labels_combined = np.concatenate((val_labels, val_labels_224), axis=0)
test_labels_combined = np.concatenate((test_labels, test_labels_224), axis=0)


In [None]:
preprocessed_train_data = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_data_complete.npy')
preprocessed_val_data = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_data_complete.npy')
preprocessed_test_data = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_data_complete.npy')
train_labels = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/train_labels_complete_Xray.npy')
val_labels = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/val_labels_complete_Xray.npy')
test_labels = np.load('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/test_labels_complete_Xray.npy')

In [None]:
import numpy as np

#Calculate the mean and standard deviation from the training data
mean = np.mean(preprocessed_train_data, axis=(0, 1, 2))
std = np.std(preprocessed_train_data, axis=(0, 1, 2))

#Standardize the data
preprocessed_train_data_standardized = (preprocessed_train_data - mean) / std
preprocessed_val_data_standardized = (preprocessed_val_data - mean) / std
preprocessed_test_data_standardized = (preprocessed_test_data - mean) / std


In [None]:
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_train_data_complete.npy', np.array(preprocessed_train_data_standardized))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_val_data_complete.npy', np.array(preprocessed_val_data_standardized))
np.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/preprocessed_test_data_complete.npy', np.array(preprocessed_test_data_standardized))

#Feature extraction

In [None]:
num_classes = 5

#Model
basic_model_xray = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 1), name = "COV1"),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', name = "COV2"),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax', name = "output_layer")
])

basic_model_xray.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

basic_model_xray.summary()
#Training
history = basic_model_xray.fit(
    preprocessed_train_data,
    train_labels,
    epochs = 20,
    validation_data=(preprocessed_val_data, val_labels),
    verbose=1
)

evaluation_results = basic_model_xray.evaluate(preprocessed_test_data, test_labels)
print("Evaluation results on the test dataset:", evaluation_results)

In [None]:
#Extract features from the "COV2" layer
cnn_feature_extractor_xray = Model(inputs=basic_model_xray.input, outputs=basic_model_xray.get_layer("COV2").output, name="cnn_feature_extractor_xray2.0")

#Save the CNN feature extractor model
cnn_feature_extractor_xray.save('/content/drive/My Drive/Tilburg University/Master Thesis/KneeXrayData/cnn_feature_extractor_xray2.0')


In [None]:
preprocessed_train_data1 = preprocessed_train_data[:5785]
preprocessed_train_data2 = preprocessed_train_data[5785:]

In [None]:
batch_size = 32

cnn_features_X_train_Xray1 = []

for i in range(0, len(preprocessed_train_data1), batch_size):
    batch_data = preprocessed_train_data1[i:i + batch_size]
    features = cnn_feature_extractor_xray.predict(batch_data)
    cnn_features_X_train_Xray1.append(features)

#Concatenate the results
cnn_features_X_train_Xray1 = np.concatenate(cnn_features_X_train_Xray1, axis=0)

np.save('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/cnn_features_X_train_Xray1.npy', cnn_features_X_train_Xray1)

In [None]:
cnn_features_X_train_Xray2 = []

for i in range(0, len(preprocessed_train_data2), batch_size):
    batch_data = preprocessed_train_data2[i:i + batch_size]
    features = cnn_feature_extractor_xray.predict(batch_data)
    cnn_features_X_train_Xray2.append(features)

#Concatenate the results
cnn_features_X_train_Xray2 = np.concatenate(cnn_features_X_train_Xray2, axis=0)

np.save('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/cnn_features_X_train_Xray2.npy', cnn_features_X_train_Xray2)

In [None]:
cnn_features_X_val_Xray = []

for i in range(0, len(preprocessed_val_data), batch_size):
    batch_data = preprocessed_val_data[i:i + batch_size]
    features = cnn_feature_extractor_xray.predict(batch_data)
    cnn_features_X_val_Xray.append(features)

#Concatenate the results
cnn_features_X_val_Xray = np.concatenate(cnn_features_X_val_Xray, axis=0)

np.save('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/cnn_features_X_val_Xray.npy', cnn_features_X_val_Xray)

In [None]:
cnn_features_X_test_Xray = []

for i in range(0, len(preprocessed_test_data), batch_size):
    batch_data = preprocessed_test_data[i:i + batch_size]
    features = cnn_feature_extractor_xray.predict(batch_data)
    cnn_features_X_test_Xray.append(features)

#Concatenate the results
cnn_features_X_test_Xray = np.concatenate(cnn_features_X_test_Xray, axis=0)

np.save('/content/drive/My Drive/Tilburg University/Master Thesis/Combined data/cnn_features_X_test_Xray.npy', cnn_features_X_test_Xray)

#Visualisation

In [None]:
# Count occurrences for each grade
grade_counts = np.sum(train_labels, axis=0)

# Bar plot
grades = [f"Grade {i}" for i in range(len(grade_counts))]
plt.figure(figsize=(10, 6))
plt.bar(grades, grade_counts, color="#b6d7a8", label="Grade Counts")
plt.ylabel('Number of samples')
plt.xlabel("Grades")
plt.legend()
plt.show()