In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.layers import Dense
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import EarlyStopping
from PIL import Image
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.xception import Xception
from sklearn.metrics import classification_report
from tensorflow import keras
import pickle
import logging
import cv2

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("brain_stroke_cnn.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Create models directory if it doesn't exist
os.makedirs('models', exist_ok=True)

# Verify and fix dataset paths to avoid Unicode encoding issues
def verify_and_fix_paths():
    """Verify dataset paths exist and fix potential encoding issues"""
    global DATASET_PATH, TRAIN_PATH, TEST_PATH, VALID_PATH
    
    logger.info("Verifying dataset paths...")
    
    # List of potential dataset locations
    potential_paths = [
        'Brain_Stroke_CT-SCAN_image',
        'Brain_Stroke_CT_SCAN_image',  # Try alternative dash vs underscore
        'Dataset',
        os.path.join('.', 'Brain_Stroke_CT-SCAN_image'),
        os.path.join('.', 'Brain_Stroke_CT_SCAN_image'),
        os.path.join('.', 'Dataset')
    ]
    
    # Find first valid dataset path
    for path in potential_paths:
        if os.path.exists(path):
            logger.info(f"Found dataset at: {path}")
            return path
    
    logger.error("Could not find a valid dataset path")
    return None

# Set up paths using the verification function
print("Setting up paths...")
DATASET_PATH = verify_and_fix_paths()
if DATASET_PATH is None:
    raise FileNotFoundError("Could not find dataset directory. Please check your dataset path.")

TRAIN_PATH = os.path.join(DATASET_PATH, 'Train')
TEST_PATH = os.path.join(DATASET_PATH, 'Test')
VALID_PATH = os.path.join(DATASET_PATH, 'Validation')

# Verify derived paths exist
print("Verifying training, testing, and validation paths...")
for name, path in [('Training', TRAIN_PATH), ('Testing', TEST_PATH), ('Validation', VALID_PATH)]:
    if os.path.exists(path):
        print(f"{name} path exists: {path}")
    else:
        print(f"WARNING: {name} path does not exist: {path}")

# Set image parameters
Img_size = (228, 228)
batch_size = 34

# Function to load datasets manually (to avoid Unicode path issues)
def load_datasets_manually():
    """
    Load and prepare datasets manually to avoid encoding issues
    
    Returns:
    --------
    tuple
        train_ds, valid_ds, test_ds, class_names
    """
    print("Loading datasets manually to avoid encoding issues...")
    
    # Check if paths exist
    for path_name, path in [("Train", TRAIN_PATH), ("Test", TEST_PATH), ("Validation", VALID_PATH)]:
        if not os.path.exists(path):
            raise FileNotFoundError(f"{path_name} path does not exist: {path}")
    
    # Get class names from subdirectories
    class_names = []
    for item in os.listdir(TRAIN_PATH):
        item_path = os.path.join(TRAIN_PATH, item)
        if os.path.isdir(item_path):
            class_names.append(item)
    
    class_names = sorted(class_names)
    print(f"Found classes: {class_names}")
    
    # Function to load images from a directory
    def load_images_from_dir(directory):
        images = []
        labels = []
        
        for class_idx, class_name in enumerate(class_names):
            class_path = os.path.join(directory, class_name)
            if not os.path.exists(class_path):
                print(f"Class directory not found: {class_path}")
                continue
                
            print(f"Loading images from {class_path}")
            for img_file in os.listdir(class_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(class_path, img_file)
                    try:
                        # Load and preprocess image
                        img = cv2.imread(img_path)
                        if img is None:
                            print(f"Failed to load image: {img_path}")
                            continue
                            
                        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                        img = cv2.resize(img, Img_size)
                        img = img / 255.0  # Normalize
                        
                        images.append(img)
                        
                        # For binary classification
                        label = class_idx
                        labels.append(label)
                    except Exception as e:
                        print(f"Error processing image {img_path}: {str(e)}")
        
        return np.array(images), np.array(labels)
    
    # Load dataset for each split
    x_train, y_train = load_images_from_dir(TRAIN_PATH)
    x_valid, y_valid = load_images_from_dir(VALID_PATH)
    x_test, y_test = load_images_from_dir(TEST_PATH)
    
    print(f"Training set: {len(x_train)} images")
    print(f"Validation set: {len(x_valid)} images")
    print(f"Test set: {len(x_test)} images")
    
    # Create TensorFlow datasets
    train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size)
    valid_ds = tf.data.Dataset.from_tensor_slices((x_valid, y_valid)).batch(batch_size)
    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)
    
    # Define data augmentation layer
    data_augmentation = tf.keras.Sequential([
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomRotation(0.2),
        tf.keras.layers.RandomZoom(0.2),
        tf.keras.layers.RandomTranslation(0.2, 0.2),
    ])
    
    # Apply data augmentation to training set
    train_ds = train_ds.map(
        lambda x, y: (data_augmentation(x, training=True), y),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    
    # Use caching and prefetching for better performance
    train_ds = train_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    valid_ds = valid_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    test_ds = test_ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    
    return train_ds, valid_ds, test_ds, class_names

# Try to load datasets using tf.keras.utils.image_dataset_from_directory first
print("Loading datasets...")
try:
    train_ds = tf.keras.utils.image_dataset_from_directory(
        TRAIN_PATH, seed=123, image_size=Img_size, batch_size=batch_size)

    valid_ds = tf.keras.utils.image_dataset_from_directory(
        VALID_PATH, seed=123, image_size=Img_size, batch_size=batch_size)

    test_ds = tf.keras.utils.image_dataset_from_directory(
        TEST_PATH, shuffle=False, image_size=Img_size, batch_size=batch_size)
    
    # Get class names
    class_names = train_ds.class_names
    print(f"Class names: {class_names}")
    
except Exception as e:
    print(f"Error loading datasets with tf.keras.utils.image_dataset_from_directory: {str(e)}")
    print("Falling back to manual dataset loading...")
    
    # Fall back to manual loading
    train_ds, valid_ds, test_ds, class_names = load_datasets_manually()

# Visualizing sample images (optional)
print("Visualizing sample images...")
try:
    plt.figure(figsize=(10, 10))
    for images, labels in train_ds.take(1):
        for i in range(min(25, images.shape[0])):
            ax = plt.subplot(5, 5, i + 1)
            plt.imshow(images[i].numpy().astype("uint8"))
            plt.title(class_names[labels[i]])
            plt.axis('off')
    plt.savefig('sample_training_images.png')
    plt.close()
except Exception as e:
    print(f"Error visualizing sample images: {str(e)}")

# Count images in training set
print("Counting training images...")
class_counts = {}
for class_name in os.listdir(TRAIN_PATH):
    class_dir = os.path.join(TRAIN_PATH, class_name)
    if os.path.isdir(class_dir):
        num_images = len(os.listdir(class_dir))
        class_counts[class_name] = num_images
        print(f"{class_name}: {num_images} images")

# Visualize class distribution
plt.figure(figsize=(10, 6))
plt.bar(class_counts.keys(), class_counts.values(), color='green')
plt.title('Image Count per Class')
plt.xlabel('Class Names')
plt.ylabel('Number of Images')
plt.xticks(rotation=45)
plt.savefig('class_distribution.png')
plt.close()

# Rest of the code remains the same
# Define callbacks for training
print("Setting up model training callbacks...")
anne = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=2, min_lr=0.0000001, min_delta=0.00001, mode='auto')
earlystop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
callbacks = [anne, earlystop]

# List to store all models
models = {}

# DenseNet121 Model
print("\n=== Training DenseNet121 Model ===")
base_model = DenseNet121(input_shape=(228, 228, 3), include_top=False, weights='imagenet')
for layer in base_model.layers:
    layer.trainable = True
x = base_model.output
x = Flatten()(x)
pred = Dense(1, activation='sigmoid')(x)
model1 = Model(inputs=base_model.input, outputs=pred)
print("Model summary:")
model1.summary()

model1.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

history1 = model1.fit(
    train_ds,
    epochs=50,
    validation_data=valid_ds,
    callbacks=[callbacks]
)

2025-04-15 22:12:35,277 - __main__ - INFO - Verifying dataset paths...
2025-04-15 22:12:35,278 - __main__ - INFO - Found dataset at: Brain_Stroke_CT_SCAN_image


Setting up paths...
Verifying training, testing, and validation paths...
Training path exists: Brain_Stroke_CT_SCAN_image\Train
Testing path exists: Brain_Stroke_CT_SCAN_image\Test
Validation path exists: Brain_Stroke_CT_SCAN_image\Validation
Loading datasets...
Error loading datasets with tf.keras.utils.image_dataset_from_directory: 'utf-8' codec can't decode byte 0xe0 in position 8: invalid continuation byte
Falling back to manual dataset loading...
Loading datasets manually to avoid encoding issues...
Found classes: ['Normal', 'Stroke']
Loading images from Brain_Stroke_CT_SCAN_image\Train\Normal
Loading images from Brain_Stroke_CT_SCAN_image\Train\Stroke
Loading images from Brain_Stroke_CT_SCAN_image\Validation\Normal
Loading images from Brain_Stroke_CT_SCAN_image\Validation\Stroke
Loading images from Brain_Stroke_CT_SCAN_image\Test\Normal
Loading images from Brain_Stroke_CT_SCAN_image\Test\Stroke
Training set: 1843 images
Validation set: 235 images
Test set: 437 images






Visualizing sample images...
Counting training images...
Normal: 1087 images
Stroke: 756 images
Setting up model training callbacks...

=== Training DenseNet121 Model ===






Model summary:
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 228, 228, 3)]        0         []                            
                                                                                                  
 zero_padding2d (ZeroPaddin  (None, 234, 234, 3)          0         ['input_1[0][0]']             
 g2D)                                                                                             
                                                                                                  
 conv1/conv (Conv2D)         (None, 114, 114, 64)         9408      ['zero_padding2d[0][0]']      
                                                                                                  
 conv1/bn (BatchNormalizati  (None, 114, 114, 64)         256       ['conv1/con




Epoch 1/50












10/55 [====>.........................] - ETA: 2:20 - loss: 0.2117 - accuracy: 0.9029 - precision: 0.0000e+00 - recall: 0.0000e+00