**Create Symbolic Links and Split Data**

In [None]:
import os
import sys
import random
import shutil

def delete_symbolic_links(target_dir):
    """
    Delete all symbolic links in the specified directory.
    """
    if not os.path.exists(target_dir):
        print(f"Directory {target_dir} does not exist.")
        return

    for filename in os.listdir(target_dir):
        file_path = os.path.join(target_dir, filename)
        
        # Check if the file is a symbolic link
        if os.path.islink(file_path):
            os.unlink(file_path)
            # print(f"Deleted symbolic link: {file_path}")
        else:
            print(f"Skipped (not a link): {file_path}")

def split_data_and_create_links(source_dirs, train_dir, val_dir, test_dir, positive, split_ratio=(0.6, 0.2, 0.2)):
    # Function to create directories and copy links
    # This function seems to be having a problem, the symbolic links are being created but they are somehow broken and don't work
    def create_dir_and_symlink_files(file_list, source_dir, target_dir):
        source_dir = os.path.abspath(source_dir)
        if not os.path.exists(target_dir):
            os.makedirs(target_dir)

        for filename in file_list:
            source_file = os.path.join(source_dir, filename)
            target_file = os.path.join(target_dir, filename)

            if os.path.isfile(source_file) and not os.path.exists(target_file):
                os.symlink(source_file, target_file)

    # Process each source directory
    for source_dir in source_dirs:
        if not os.path.exists(source_dir):
            print(f"Source directory {source_dir} does not exist.")
            continue

        # Get all filenames in the source directory
        filenames = os.listdir(source_dir)
        random.shuffle(filenames)  # Shuffle to randomize distribution

        # Calculate split indices
        total_files = len(filenames)
        train_end = int(total_files * split_ratio[0])
        val_end = train_end + int(total_files * split_ratio[1])

        # Split filenames into training, validation, and test sets
        train_filenames = filenames[:train_end]
        val_filenames = filenames[train_end:val_end]
        test_filenames = filenames[val_end:]

        # Create directories and copy the files
        pos_or_neg = 'positive' if positive else 'negative'
        create_dir_and_symlink_files(train_filenames, source_dir, os.path.join(train_dir, pos_or_neg))
        create_dir_and_symlink_files(val_filenames, source_dir, os.path.join(val_dir, pos_or_neg))
        create_dir_and_symlink_files(test_filenames, source_dir, os.path.join(test_dir, pos_or_neg))

# Delete old symbolic links:
for path in ['../data/train/positive', '../data/train/negative', '../data/val/positive', '../data/val/negative', '../data/test/positive', '../data/test/negative']:
    delete_symbolic_links(path)

# Source directories
pos_image_paths = ['../all_brain_images/manual_label', '../all_brain_images/tumor_dataset']
neg_image_paths = ['../all_non_brain/manual_label', '../all_non_brain/ct_scans', '../all_non_brain/rand_geographic']

# Split data and create new directories with symbolic links
split_data_and_create_links(pos_image_paths, '../data/train', '../data/val', '../data/test', positive=True)
split_data_and_create_links(neg_image_paths, '../data/train', '../data/val', '../data/test', positive=False)


**Train Model**

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input

# Data augmentation for the training data
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # VGG16 preprocessing
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

# Only rescaling for the validation and test data
val_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Paths to train, validation, and test directories
train_dir = os.path.normpath('../data/train')
val_dir = os.path.normpath('../data/val')
test_dir = os.path.normpath('../data/test')


# Data generators for train, validation, and test sets
train_generator = train_datagen.flow_from_directory(
    train_dir, 
    target_size=(224, 224), 
    batch_size=32,
    class_mode='binary'
)

validation_generator = val_test_datagen.flow_from_directory(
    val_dir, 
    target_size=(224, 224), 
    batch_size=32,
    class_mode='binary'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir, 
    target_size=(224, 224), 
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Usually, you don't shuffle the test data
)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras.applications import VGG16

# Load VGG16 model, pre-trained on ImageNet data
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

model = Sequential([
    base_model,            # Base model (VGG16)
    Flatten(),             # Flatten the output
    Dense(256, activation='relu'),  # A dense layer
    Dropout(0.5),          # Dropout for regularization
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

num_epochs = 10  # Set the number of epochs

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=num_epochs
)

test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test accuracy: {test_accuracy}, Test loss: {test_loss}")

model.save('../models/brain_img_classifier_1.h5') 
