In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
import cv2
import random
import sklearn.model_selection as model_selection
import logging
from keras.applications.vgg16 import VGG16
from keras.layers import Input, Flatten, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from sklearn.model_selection import train_test_split
import cv2
import random

img_width, img_height = 150, 150
input_shape = (img_width, img_height, 3)
batch_size = 128
epochs = 10
num_classes = 5

# train_datagen = ImageDataGenerator(
#     rescale=1. / 255,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True,
#     validation_split=0.2
# )  

def prepare_file_paths(categories, base_dir):
    file_paths = []
    labels = []
    
    label_mapping = {'BlotGel': 0, 'FACS': 1, 'Macroscopy': 2, 'Microscopy': 3, 'Noneoftheabove':4}

    for category in categories:
        path = os.path.join(base_dir, f"{category}")
        category_label = label_mapping[category]
        
        for img in os.listdir(path):
            file_paths.append(os.path.join(path, img))
            labels.append(category_label)

    return file_paths, labels

def match_class_counts(paths, labels):
    class_counts = {class_name: labels.count(class_name) for class_name in set(labels)}
    print(class_counts)
    for key, value in class_counts.items():
        print(f"{key} -> {value}")
    return paths, labels, class_counts

def create_generator_with_balanced_augmentation(paths, labels, class_counts, batch_size):
    num_samples = len(paths)
    steps = num_samples // batch_size
    remaining_paths = num_samples % batch_size

    majority_class = max(class_counts, key=class_counts.get)
    oversampling_images = {class_name: ((class_counts[majority_class] - class_counts[class_name]) // steps) for class_name in class_counts}

    datagen = ImageDataGenerator(
        brightness_range=[0.8, 1.2],
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True
    )

    while True:
        for i in range(steps + 1):
            if i != steps:
                batch_paths = paths[i * batch_size: (i + 1) * batch_size]
                batch_labels = labels[i * batch_size: (i + 1) * batch_size]
            else:
                batch_paths = paths[i * batch_size: (i * batch_size) + remaining_paths]
                batch_labels = labels[i * batch_size: (i * batch_size) + remaining_paths]

            images = []
            for path in batch_paths:
                try:
                    rawdata = cv2.imread(path)
                    resized_image = cv2.resize(rawdata, (img_width, img_height))
                    images.append(resized_image)
                except Exception as e:
                    print("Error")
                    break
            
            if i != steps:
                class_indices = {label: [] for label in oversampling_images.keys()}
                for idx, label in enumerate(batch_labels):
                    class_indices[label].append(idx)

                for label in oversampling_images.keys():
                    num_to_augment = oversampling_images[label]
                    if num_to_augment != 0:
                        if len(class_indices[label]) <= num_to_augment:
                            num_to_augment = len(class_indices[label])
                        indices_to_augment = random.sample(class_indices[label], num_to_augment)
                        images_path_to_augment = [batch_paths[idx] for idx in indices_to_augment]
                        labels_of_images = [batch_labels[idx] for idx in indices_to_augment]

                        augmented_images_per_label = []
                        for path in images_path_to_augment:
                            try:
                                rawdata = cv2.imread(path)
                                augmented_image = datagen.random_transform(rawdata)
                                augmented_image = np.clip(augmented_image, 0, 255)
                                new_data = cv2.resize(augmented_image, (img_width, img_height))
                                augmented_images_per_label.append(new_data)
                            except Exception as e:
                                print(f"Error processing image: {e}")

                        images.extend(augmented_images_per_label)
                        batch_labels.extend(labels_of_images)
            
            batch_images = np.array(images).reshape(-1, img_width, img_height, 3)
            batch_images = batch_images / 255.0

            batch_labels = np.array(batch_labels)
            
            yield batch_images, batch_labels


def create_generator(paths, labels, batch_size):
    num_samples = len(paths)
    steps = num_samples // batch_size
    remaining_paths = num_samples % batch_size
    while True:
        for i in range(steps+1):
            if i != steps:
                batch_paths = paths[i * batch_size: (i + 1) * batch_size]
                batch_labels = labels[i * batch_size: (i + 1) * batch_size]
            else:
                batch_paths = paths[i * batch_size: (i * batch_size)+remaining_paths]
                batch_labels = labels[i * batch_size: (i * batch_size)+remaining_paths]

            images = []
            for path in batch_paths:
                try:
                    rawdata = cv2.imread(path)
                    resized_image = cv2.resize(rawdata, (img_width, img_height))
                    images.append(resized_image)
                except Exception as e:
                    pass

            batch_images = np.array(images).reshape(-1, img_width, img_height, 3)
            batch_images = batch_images / 255.0

            yield batch_images, np.array(batch_labels)

# Categories
categories = ["BlotGel", "FACS", "Macroscopy", "Microscopy", "Noneoftheabove"]
base_dir = "./classified_images/"

# Prepare file paths and labels
file_paths, labels = prepare_file_paths(categories, base_dir)

# Split each view's data into train, validation, and test sets
train_paths, test_paths, train_labels, test_labels = train_test_split(file_paths, labels, test_size=0.15, random_state=42)
train_paths, val_paths, train_labels, val_labels = train_test_split(train_paths, train_labels, test_size=0.15, random_state=42)

# Match class counts for each view
train_paths, train_labels, class_counts = match_class_counts(train_paths, train_labels)

# Create generators for train, validation, and test data for each view with augmentation based on the highest count
batch_size = 128
train_generator = create_generator_with_balanced_augmentation(train_paths, train_labels, class_counts, batch_size)
val_generator = create_generator(val_paths, val_labels, batch_size)
test_generator = create_generator(test_paths, test_labels, batch_size)


{0: 19859, 1: 775, 2: 856, 3: 13033, 4: 749}
0 -> 19859
1 -> 775
2 -> 856
3 -> 13033
4 -> 749


In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, Callback
from tensorflow.keras import regularizers
import numpy as np
import warnings

# Function to create a CNN model with regularized layers
def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(Dense(5, activation='softmax'))  # 4 classes for AD, LMCI, EMCI, CN

    return model

# Function to train model with callbacks
def train_model_with_callbacks(model, train_paths, val_paths, train_generator, val_generator, model_name, batch_size, num_epochs):
    initial_learning_rate = 0.001
    decay_factor = 0.5
    patience_epochs = 5
    min_accuracy_increase = 0.01  # Minimum increase in validation accuracy to reset patience
    current_patience = 0
    best_val_accuracy = 0.0  # Track the best validation accuracy
    # List to store learning rates and epochs
    learning_rates = []
    epochs_list = []

    def lr_decay(epoch, current_lr):
        nonlocal current_patience, best_val_accuracy

        # Monitor the validation accuracy after the first epoch
        if epoch > 0:
            val_accuracy = model.history.history['val_sparse_categorical_accuracy'][-1]
            print(model.history.history.keys())
            print(f'Epoch {epoch}, Validation Accuracy: {val_accuracy}')

            if val_accuracy - best_val_accuracy > min_accuracy_increase:
                # If there is a significant increase in validation accuracy, reset patience
                current_patience = 0
                best_val_accuracy = val_accuracy
            elif current_patience >= patience_epochs:
                # If patience is exhausted, decay the learning rate
                new_lr = current_lr * decay_factor
                current_patience = 0
                if best_val_accuracy < val_accuracy:
                    best_val_accuracy = val_accuracy
                epochs_list.append(epoch)  # Store the epoch
                learning_rates.append(new_lr)  # Store the learning rate
                print(f'Reducing learning rate to: {new_lr}')
                return new_lr
            else:
                # Increment patience if there is no improvement in validation accuracy
                current_patience += 1

        return current_lr


    lr_scheduler = LearningRateScheduler(lambda epoch, lr: lr_decay(epoch, lr), verbose=1)
    best_model_path = f'{model_name}_cnn_best.h5'
    checkpoint_acc = ModelCheckpoint(best_model_path, monitor='val_sparse_categorical_accuracy', save_best_only=True, mode='max', verbose=1)

    try:
        model.load_weights(best_model_path)
        print("Loaded the best model")
    except Exception as e:
        print("No best model found. Training from scratch.")
        print(e)

    steps_per_epoch = len(train_paths) // batch_size
    validation_steps = len(val_paths) // batch_size

    # Check for remaining samples and add an extra step if necessary
    if len(train_paths) % batch_size != 0:
        steps_per_epoch += 1

    if len(val_paths) % batch_size != 0:
        validation_steps += 1

    history = model.fit(train_generator, epochs=num_epochs, steps_per_epoch=steps_per_epoch, validation_data=val_generator, validation_steps=validation_steps, callbacks=[checkpoint_acc, lr_scheduler])

    # Plot learning rate vs epochs
    plt.plot(epochs_list, learning_rates, marker='o', linestyle='-', color='b')
    plt.title('Learning Rate vs Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Learning Rate')
    plt.grid(True)
    plt.show()

    return history

# # Function to train model with callbacks
# def train_model_with_callbacks(model, train_paths, val_paths, train_generator, val_generator, model_name, batch_size, num_epochs):
#     initial_learning_rate = 0.001
#     decay_factor = 0.5
#     patience_epochs = 5
#     current_patience = 0

#     def lr_decay(epoch, current_lr):
#         nonlocal current_patience

#         if current_patience >= patience_epochs:
#             new_lr = current_lr * decay_factor
#             current_patience = 0
#             print(f'Reducing learning rate to: {new_lr}')
#             return new_lr
#         else:
#             current_patience += 1
#             return current_lr

#     lr_scheduler = LearningRateScheduler(lr_decay, verbose=1)
#     best_model_path = '/kaggle/working/' + f'{model_name}_best.h5'
#     checkpoint_acc = ModelCheckpoint(best_model_path, monitor='val_accuracy', save_best_only=True, mode='max', verbose=1)

#     try:
#         model.load_weights(best_model_path)
#         print("Loaded the best model")
#     except Exception as e:
#         print("No best model found. Training from scratch.")
#         print(e)

#     # Compile the model
#     #model.compile(optimizer=Adam(learning_rate=initial_learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#     steps_per_epoch = len(train_paths) // batch_size
#     validation_steps = len(val_paths) // batch_size

#     # Check for remaining samples and add an extra step if necessary
#     if len(train_paths) % batch_size != 0:
#         steps_per_epoch += 1

#     if len(val_paths) % batch_size != 0:
#         validation_steps += 1

# #     steps_per_epoch = len(train_paths) // batch_size + 1
# #     validation_steps = len(val_paths) // batch_size + 1

#     history = model.fit(train_generator, epochs=num_epochs, steps_per_epoch=steps_per_epoch, validation_data=val_generator, validation_steps=validation_steps, callbacks=[checkpoint_acc, lr_scheduler])

#     return history

# Rest of the code remains the same...
batch_size = 128
num_epochs = 10  # Set an appropriate number of epochs

model = create_cnn_model(input_shape)

model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

# Train and save the best models for each view
history = train_model_with_callbacks(model, train_paths, val_paths, train_generator, val_generator, 'minded', batch_size, num_epochs)



No best model found. Training from scratch.
[Errno 2] Unable to open file (unable to open file: name = 'minded_cnn_best.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

Epoch 1: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 1/10
Epoch 1: val_sparse_categorical_accuracy improved from -inf to 0.29157, saving model to minded_cnn_best.h5
dict_keys(['loss', 'sparse_categorical_accuracy', 'val_loss', 'val_sparse_categorical_accuracy', 'lr'])
Epoch 1, Validation Accuracy: 0.29156625270843506

Epoch 2: LearningRateScheduler setting learning rate to 0.0010000000474974513.
Epoch 2/10


  saving_api.save_model(




KeyboardInterrupt: 

In [None]:
# Calculate the number of steps based on the test paths and batch size 
num_steps = len(test_paths) // batch_size
# Check for remaining samples and add an extra step if necessary
if len(test_paths) % batch_size != 0:
    num_steps += 1


In [None]:
# CODE for confusion matrix and classification details:

from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import seaborn as sns  
# Load the models
# axial_best_model = load_model('/kaggle/working/axial_best_acc.h5')
# sagittal_best_model = load_model('/kaggle/working/sagittal_best_acc.h5')
# coronal_best_model = load_model('/kaggle/working/coronal_best_acc.h5')

# Function to plot a confusion matrix
def plot_confusion_matrix(confusion_matrix, categories):
    plt.figure(figsize=(8, 6))
    sns.heatmap(confusion_matrix, annot=True, cmap='Blues', fmt='g', xticklabels=categories, yticklabels=categories)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.title('Confusion Matrix')
    plt.show()

# Axial Model
#num_steps_axial = len(axial_test_paths) // batch_size
predictions = model.predict(test_generator, steps=num_steps)
predicted_labels = np.argmax(predictions, axis=1)
confusion = confusion_matrix(test_labels, predicted_labels)
classification_report = classification_report(test_labels, predicted_labels, target_names=categories)

print("Classification Report:")
print(classification_report)

# Plotting Confusion Matrix
plot_confusion_matrix(confusion, categories)
