In [6]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# 1. Configure GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

# 2. Custom Data Generator for unstructured training data
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, image_dir, label_func, target_size=(224,224), batch_size=8, shuffle=True):
        self.image_paths = [os.path.join(image_dir,f) for f in os.listdir(image_dir) 
                          if f.endswith(('.jpg','.jpeg','.png'))]
        self.label_func = label_func
        self.target_size = target_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))
    
    def __getitem__(self, index):
        batch_paths = self.image_paths[index*self.batch_size:(index+1)*self.batch_size]
        batch_images = []
        batch_labels = []
        
        for path in batch_paths:
            img = tf.keras.preprocessing.image.load_img(
                path, target_size=self.target_size)
            img = tf.keras.preprocessing.image.img_to_array(img)
            img = img / 255.0  # Normalize
            
            label = self.label_func(os.path.basename(path))
            
            batch_images.append(img)
            batch_labels.append(label)
            
        return np.array(batch_images), np.array(batch_labels)
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.image_paths)

# 3. Label function (modify according to your naming convention)
def get_label(filename):
    filename = filename.lower()
    if 'autistic' in filename:
        return 0
    elif 'non_autistic' in filename or 'nonautistic' in filename:
        return 1
    else:
        raise ValueError(f"Cannot determine label for {filename}")

# 4. Hybrid Model
def create_hybrid_model():
    input_tensor = Input(shape=(224, 224, 3))
    
    # VGG16 branch
    vgg = VGG16(include_top=False, weights='imagenet', input_tensor=input_tensor)
    vgg_out = GlobalAveragePooling2D()(vgg.output)
    
    # ResNet50 branch
    resnet = ResNet50(include_top=False, weights='imagenet', input_tensor=input_tensor)
    resnet_out = GlobalAveragePooling2D()(resnet.output)
    
    # Combine features
    combined = concatenate([vgg_out, resnet_out])
    
    # Classification head
    output = Dense(1, activation='sigmoid')(combined)
    
    model = Model(inputs=input_tensor, outputs=output)
    
    # Freeze base models
    for layer in vgg.layers:
        layer.trainable = False
    for layer in resnet.layers:
        layer.trainable = False
    
    return model

# 5. Training Process
def train_model():
    # Create generators
    train_gen = CustomDataGenerator(
        image_dir='./train',
        label_func=get_label,
        target_size=(224,224),
        batch_size=8
    )
    
    # Standard generator for validation (organized in subfolders)
    valid_datagen = ImageDataGenerator(rescale=1./255)
    valid_gen = valid_datagen.flow_from_directory(
        './valid',
        target_size=(224,224),
        batch_size=8,
        class_mode='binary',
        shuffle=False
    )
    
    # Verify we have data
    if len(train_gen.image_paths) == 0:
        raise ValueError("No images found in training directory")
    if valid_gen.samples == 0:
        raise ValueError("No validation images found")
    
    # Create and compile model
    model = create_hybrid_model()
    model.compile(
        optimizer=Adam(0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Train
    history = model.fit(
        train_gen,
        validation_data=valid_gen,
        epochs=10,
        callbacks=[
            EarlyStopping(patience=3),
            ModelCheckpoint('best_model.h5', save_best_only=True)
        ]
    )
    
    model.save('final_model.h5')
    print("Training completed successfully!")

if __name__ == "__main__":
    train_model()

Found 100 images belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Training completed successfully!


In [21]:
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

# 1. Load model and setup validation
MODEL_PATH = 'final_model.h5'
VALID_DIR = './valid'

model = load_model(MODEL_PATH)
valid_datagen = ImageDataGenerator(rescale=1./255)
valid_gen = valid_datagen.flow_from_directory(
    VALID_DIR,
    target_size=(224, 224),
    batch_size=8,
    class_mode='binary',
    shuffle=False
)
valid_steps = valid_gen.samples // valid_gen.batch_size

# 2. Evaluation function
def evaluate_model(model, generator, steps, class_names):
    # Get predictions
    print("\nMaking predictions...")
    val_preds_prob = model.predict(generator, steps=steps, verbose=1)
    val_preds = (val_preds_prob > 0.5).astype(int).flatten()
    val_true = generator.classes[:len(val_preds)]
    val_filenames = generator.filenames[:len(val_preds)]
    
    # Print basic metrics
    print(f"\nValidation samples: {len(val_true)}")
    print(f"Correct predictions: {(val_preds == val_true).sum()}")
    print(f"Wrong predictions: {(val_preds != val_true).sum()}")
    
    # Confusion Matrix
    cm = confusion_matrix(val_true, val_preds)
    print("\nConfusion Matrix:")
    print(cm)
    
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=class_names, 
                yticklabels=class_names, cmap='Blues')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()
    
    # Classification Report
    print("\nClassification Report:")
    print(classification_report(val_true, val_preds, target_names=class_names))
    
    # ROC Curve
    fpr, tpr, _ = roc_curve(val_true, val_preds_prob)
    roc_auc = auc(fpr, tpr)
    print(f"\nAUC Score: {roc_auc:.3f}")
    
    plt.figure(figsize=(6,5))
    plt.plot(fpr, tpr, label=f'AUC = {roc_auc:.3f}')
    plt.plot([0,1], [0,1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()
    
    # Example predictions
    def plot_examples(indices, title, max_imgs=5):
        plt.figure(figsize=(15,3))
        for i, idx in enumerate(indices[:max_imgs]):
            img_path = os.path.join(VALID_DIR, val_filenames[idx])
            img = plt.imread(img_path)
            plt.subplot(1, max_imgs, i+1)
            plt.imshow(img)
            plt.title(f"Pred: {class_names[val_preds[idx]]}\nTrue: {class_names[val_true[idx]]}")
            plt.axis('off')
        plt.suptitle(title)
        plt.show()
    
    correct_idx = np.where(val_preds == val_true)[0]
    wrong_idx = np.where(val_preds != val_true)[0]
    
    if len(correct_idx) > 0:
        print(f"\nShowing {min(5, len(correct_idx))} correct predictions...")
        plot_examples(correct_idx, "Correct Predictions")
    
    if len(wrong_idx) > 0:
        print(f"\nShowing {min(5, len(wrong_idx))} wrong predictions...")
        plot_examples(wrong_idx, "Wrong Predictions")

# 3. Run evaluation
print(f"\nEvaluating model on {valid_gen.samples} validation images...")
class_names = list(valid_gen.class_indices.keys())
evaluate_model(model, valid_gen, valid_steps, class_names)
print("\nEvaluation complete!")

Found 100 images belonging to 2 classes.

Evaluating model on 100 validation images...

Making predictions...

Validation samples: 96
Correct predictions: 50
Wrong predictions: 46

Confusion Matrix:
[[50  0]
 [46  0]]

Classification Report:
              precision    recall  f1-score   support

    Autistic       0.52      1.00      0.68        50
Non_Autistic       0.00      0.00      0.00        46

    accuracy                           0.52        96
   macro avg       0.26      0.50      0.34        96
weighted avg       0.27      0.52      0.36        96


AUC Score: 0.631

Showing 5 correct predictions...

Showing 5 wrong predictions...

Evaluation complete!


  plt.show()
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  plt.show()
  plt.show()


In [22]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def test_predictions(model, test_dir, img_size=(224, 224), batch_size=8):
    """
    Comprehensive test set evaluation with visualization
    Args:
        model: Trained Keras model
        test_dir: Path to test directory
        img_size: Target image size
        batch_size: Batch size for prediction
    """
    # 1. Setup test generator
    test_filenames = [f for f in os.listdir(test_dir) 
                     if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    if not test_filenames:
        raise ValueError(f"No images found in test directory: {test_dir}")
    
    test_df = pd.DataFrame({'filename': test_filenames})
    
    test_datagen = ImageDataGenerator(rescale=1./255)
    test_gen = test_datagen.flow_from_dataframe(
        dataframe=test_df,
        directory=test_dir,
        x_col='filename',
        y_col=None,
        target_size=img_size,
        batch_size=batch_size,
        class_mode=None,
        shuffle=False
    )
    
    # 2. Make predictions with simple progress indicator
    print(f"\nPredicting on {len(test_filenames)} test images...")
    test_preds_prob = []
    total_batches = len(test_gen)
    
    for i in range(total_batches):
        batch = next(test_gen)
        batch_preds = model.predict(batch, verbose=0)
        test_preds_prob.extend(batch_preds.flatten().tolist())
        print(f"Processed batch {i+1}/{total_batches}", end='\r')
    
    test_preds = (np.array(test_preds_prob) > 0.5).astype(int)
    
    # 3. Display predictions
    print("\n\nTest Set Predictions Summary:")
    print("="*50)
    print(f"{'Autistic':<15} {'Non_Autistic':<15} {'Total'}")
    print("-"*50)
    print(f"{np.sum(test_preds == 0):<15} {np.sum(test_preds == 1):<15} {len(test_preds)}")
    print("="*50)
    
    # 4. Save predictions to CSV
    results_df = pd.DataFrame({
        'filename': test_gen.filenames,
        'prediction': ['Autistic' if p == 0 else 'Non_Autistic' for p in test_preds],
        'confidence': test_preds_prob,
        'pred_raw': test_preds
    })
    
    results_csv = 'test_predictions.csv'
    results_df.to_csv(results_csv, index=False)
    print(f"\nPredictions saved to {results_csv}")
    
    # 5. Visualize sample predictions
    plot_test_samples(test_dir, results_df, n_samples=min(10, len(test_filenames)))

def plot_test_samples(test_dir, results_df, n_samples=10):
    """Visualize random test samples with predictions"""
    samples = results_df.sample(n_samples) if len(results_df) > n_samples else results_df
    
    plt.figure(figsize=(20, 10))
    for i, (_, row) in enumerate(samples.iterrows(), 1):
        img_path = os.path.join(test_dir, row['filename'])
        img = plt.imread(img_path)
        
        plt.subplot(2, 5, i)
        plt.imshow(img)
        plt.title(f"{row['prediction']}\nConf: {row['confidence']:.2f}", 
                 color='green' if row['prediction'] == 'Non_Autistic' else 'red')
        plt.axis('off')
    
    plt.suptitle('Sample Test Predictions', fontsize=16)
    plt.tight_layout()
    plt.show()

def predict_single_image(model, img_path, img_size=(224, 224)):
    """
    Make prediction on a single image with visualization
    Args:
        model: Trained Keras model
        img_path: Path to image file
        img_size: Target image size
    """
    # 1. Load and preprocess image
    if not os.path.exists(img_path):
        raise FileNotFoundError(f"Image not found: {img_path}")
    
    # Try face detection first
    def detect_and_crop_face(img_path):
        img = cv2.imread(img_path)
        if img is None:
            return None
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        if len(faces) == 0:
            return None
        (x, y, w, h) = faces[0]
        return cv2.cvtColor(img[y:y+h, x:x+w], cv2.COLOR_BGR2RGB)
    
    img = detect_and_crop_face(img_path)
    if img is None:
        print("No face detected, using full image")
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, img_size)
    
    # 2. Prepare for model input
    x = np.expand_dims(img, axis=0) / 255.0
    
    # 3. Make prediction
    pred_prob = model.predict(x, verbose=0)[0][0]
    pred_label = 'Autistic' if pred_prob < 0.5 else 'Non_Autistic'
    confidence = 1 - pred_prob if pred_label == 'Autistic' else pred_prob
    
    # 4. Visualize
    plt.figure(figsize=(10, 6))
    plt.imshow(img)
    plt.title(f"Prediction: {pred_label}\nConfidence: {confidence:.3f}",
             fontsize=14, pad=20,
             color='green' if pred_label == 'Non_Autistic' else 'red')
    plt.axis('off')
    
    # Add confidence bar
    ax = plt.gca()
    ax.text(0.5, -0.1, 
            f"\nAutistic: {(1-pred_prob)*100:.1f}% | Non-Autistic: {pred_prob*100:.1f}%", 
            transform=ax.transAxes,
            ha='center', va='center', fontsize=12,
            bbox=dict(facecolor='white', alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    print("\nPrediction Details:")
    print("="*40)
    print(f"Image: {os.path.basename(img_path)}")
    print(f"Prediction: {pred_label}")
    print(f"Confidence: {confidence:.3f}")
    print(f"Raw Score: {pred_prob:.3f}")
    print("="*40)

# Example Usage:
# test_predictions(hybrid_model, './test')
# predict_single_image(hybrid_model, 'path/to/image.jpg')