In [None]:
#import all the required libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import seaborn as sns
from tqdm import tqdm
import pandas as pd
from pathlib import Path
import time
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

In [None]:
# Configuration
class Config:
    # Paths
    BASE_PATH = r'C:\Final project 2\backend\datasets\Combined_Set_Cls'
    TRAIN_FRACTURE_PATH = os.path.join(BASE_PATH, 'train', 'Fracture')
    TRAIN_HEALTHY_PATH = os.path.join(BASE_PATH, 'train', 'Healthy')
    TEST_FRACTURE_PATH = os.path.join(BASE_PATH, 'test', 'Fracture')
    TEST_HEALTHY_PATH = os.path.join(BASE_PATH, 'test', 'Healthy')
    
    # Model configuration
    IMAGE_SIZE = 224
    BATCH_SIZE = 32
    NUM_EPOCHS = 50
    LEARNING_RATE = 0.0001
    WEIGHT_DECAY = 1e-4
    NUM_CLASSES = 2
    
    # Training settings
    PATIENCE = 10  # Early stopping patience
    MODEL_SAVE_PATH = 'best_bone_fracture_model.pth'
    
    # Device configuration
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

config = Config()

# Check GPU availability
print(f"Using device: {config.DEVICE}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
else:
    print("WARNING: GPU not available, training will be slow!")

In [None]:
class BoneFractureDataset(Dataset):
    #create new dataset for classifier model training
    
    def __init__(self, fracture_path, healthy_path, transform=None):
      
        self.transform = transform
        self.images = []
        self.labels = []
        
        # Load fracture images (label = 1)
        if os.path.exists(fracture_path):
            fracture_files = [f for f in os.listdir(fracture_path) 
                            if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
            for img_file in fracture_files:
                self.images.append(os.path.join(fracture_path, img_file))
                self.labels.append(1)
        
        # Load healthy images (label = 0)
        if os.path.exists(healthy_path):
            healthy_files = [f for f in os.listdir(healthy_path) 
                           if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp'))]
            for img_file in healthy_files:
                self.images.append(os.path.join(healthy_path, img_file))
                self.labels.append(0)
        
        print(f"Loaded {len(self.images)} images:")
        print(f"  - Fracture: {sum(self.labels)}")
        print(f"  - Healthy: {len(self.labels) - sum(self.labels)}")
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        
        # Load image
        image = Image.open(img_path).convert('RGB')
        
        # Apply transforms
        if self.transform:
            image = self.transform(image)
        
        return image, label

In [None]:
# Data augmentation for training
train_transform = transforms.Compose([
    transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

# No augmentation for testing
test_transform = transforms.Compose([
    transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                       std=[0.229, 0.224, 0.225])
])

print("Data transforms created successfully!")

In [None]:
# Create datasets
print("\nCreating Training Dataset...")
train_dataset = BoneFractureDataset(
    fracture_path=config.TRAIN_FRACTURE_PATH,
    healthy_path=config.TRAIN_HEALTHY_PATH,
    transform=train_transform
)

print("\nCreating Test Dataset...")
test_dataset = BoneFractureDataset(
    fracture_path=config.TEST_FRACTURE_PATH,
    healthy_path=config.TEST_HEALTHY_PATH,
    transform=test_transform
)

# Create dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=True,
    num_workers=0,  # Set to 0 for Windows to avoid multiprocessing issues
    pin_memory=True if torch.cuda.is_available() else False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=config.BATCH_SIZE,
    shuffle=False,
    num_workers=0,  # Set to 0 for Windows to avoid multiprocessing issues
    pin_memory=True if torch.cuda.is_available() else False
)

print(f"\nDataLoaders created:")
print(f"  - Training batches: {len(train_loader)}")
print(f"  - Testing batches: {len(test_loader)}")

In [None]:
def imshow(img, title):
    
    img = img.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = std * img + mean
    img = np.clip(img, 0, 1)
    plt.imshow(img)
    plt.title(title)
    plt.axis('off')

print("Getting sample images...")
# Get a batch of training data
images, labels = next(iter(train_loader))
print(f"Got batch with {len(images)} images")

# Plot sample images
print("Creating visualization...")
fig = plt.figure(figsize=(15, 8))

for idx in range(min(8, len(images))):
    plt.subplot(2, 4, idx + 1)
    imshow(images[idx], f"{'Fracture' if labels[idx] == 1 else 'Healthy'}")

plt.tight_layout()
plt.savefig('sample_images.png', dpi=100, bbox_inches='tight')  # Reduced DPI for speed
plt.close()  # Close instead of show to avoid display issues
print("✓ Sample images saved as 'sample_images.png'")

In [None]:
class BoneFractureClassifier(nn.Module):
    
    
    def __init__(self, num_classes=2, pretrained=True):
        super(BoneFractureClassifier, self).__init__()
        
        # Load pretrained ResNet50
        self.resnet = models.resnet50(pretrained=pretrained)
        
        # Get the number of features from the last layer
        num_features = self.resnet.fc.in_features
        
        # Replace the final fully connected layer
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.resnet(x)

# Initialize model
model = BoneFractureClassifier(num_classes=config.NUM_CLASSES, pretrained=True)
model = model.to(config.DEVICE)

print("\nModel Architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer with weight decay
optimizer = optim.Adam(
    model.parameters(),
    lr=config.LEARNING_RATE,
    weight_decay=config.WEIGHT_DECAY
)

# Learning rate scheduler (reduces LR when validation accuracy plateaus)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='max',
    factor=0.5,
    patience=5
)

print("Loss function, optimizer, and scheduler initialized!")
print(f"Initial learning rate: {config.LEARNING_RATE}")

In [None]:
def train_epoch(model, dataloader, criterion, optimizer, device):
   
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    progress_bar = tqdm(dataloader, desc='Training')
    for images, labels in progress_bar:
        images = images.to(device)
        labels = labels.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        # Update progress bar
        progress_bar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'acc': f'{100 * correct / total:.2f}%'
        })
    
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc


def validate(model, dataloader, criterion, device):
   
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        progress_bar = tqdm(dataloader, desc='Validation')
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Statistics
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Update progress bar
            progress_bar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{100 * correct / total:.2f}%'
            })
    
    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc, all_preds, all_labels

print("Training and validation functions defined!")

In [None]:
# Training history
history = {
    'train_loss': [],
    'train_acc': [],
    'val_loss': [],
    'val_acc': []
}

best_acc = 0.0
patience_counter = 0
start_time = time.time()


print(f"Starting Training on {config.DEVICE}")


for epoch in range(config.NUM_EPOCHS):
    print(f"\nEpoch {epoch + 1}/{config.NUM_EPOCHS}")
    
    
    # Train
    train_loss, train_acc = train_epoch(
        model, train_loader, criterion, optimizer, config.DEVICE
    )
    
    # Validate
    val_loss, val_acc, _, _ = validate(
        model, test_loader, criterion, config.DEVICE
    )
    
    # Update learning rate
    scheduler.step(val_acc)
    
    # Save history
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    # Print epoch summary
    print(f"\nEpoch {epoch + 1} Summary:")
    print(f"  Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
    print(f"  Val Loss:   {val_loss:.4f} | Val Acc:   {val_acc:.2f}%")
    
    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_acc': best_acc,
            'history': history
        }, config.MODEL_SAVE_PATH)
        print(f"  ✓ New best model saved! (Accuracy: {best_acc:.2f}%)")
        patience_counter = 0
    else:
        patience_counter += 1
        print(f"  Patience: {patience_counter}/{config.PATIENCE}")
    
    # Early stopping
    if patience_counter >= config.PATIENCE:
        print(f"\n Early stopping triggered after {epoch + 1} epochs")
        break

total_time = time.time() - start_time

print(f"Training Complete!")
print(f"Total Training Time: {total_time/60:.2f} minutes")
print(f"Best Validation Accuracy: {best_acc:.2f}%")


In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Loss plot
ax1.plot(history['train_loss'], label='Train Loss', marker='o')
ax1.plot(history['val_loss'], label='Validation Loss', marker='s')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()
ax1.grid(True)

# Accuracy plot
ax2.plot(history['train_acc'], label='Train Accuracy', marker='o')
ax2.plot(history['val_acc'], label='Validation Accuracy', marker='s')
ax2.axhline(y=90, color='r', linestyle='--', label='90% Target')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Training and Validation Accuracy')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
plt.show()
print("Training history plot saved as 'training_history.png'")

In [None]:
# Load best model
checkpoint = torch.load(config.MODEL_SAVE_PATH)
model.load_state_dict(checkpoint['model_state_dict'])
print(f"Loaded best model from epoch {checkpoint['epoch'] + 1}")
print(f"Best validation accuracy: {checkpoint['best_acc']:.2f}%\n")

# Evaluate on test set
print("Evaluating on Test Set...")
test_loss, test_acc, test_preds, test_labels = validate(
    model, test_loader, criterion, config.DEVICE
)


print(f"TEST SET RESULTS")

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.2f}%")


In [None]:
# Calculate detailed metrics
precision = precision_score(test_labels, test_preds, average='binary')
recall = recall_score(test_labels, test_preds, average='binary')
f1 = f1_score(test_labels, test_preds, average='binary')

print("\nDetailed Classification Metrics:")

print(f"Accuracy:  {test_acc:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")


# Classification report
print("\nClassification Report:")
print(classification_report(
    test_labels, 
    test_preds, 
    target_names=['Healthy', 'Fracture']
))

In [None]:
# Compute confusion matrix
cm = confusion_matrix(test_labels, test_preds)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Healthy', 'Fracture'],
            yticklabels=['Healthy', 'Fracture'],
            cbar_kws={'label': 'Count'})
plt.title('Confusion Matrix', fontsize=16, pad=20)
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)

# Add percentage annotations
for i in range(2):
    for j in range(2):
        percentage = cm[i, j] / cm[i].sum() * 100
        plt.text(j + 0.5, i + 0.7, f'({percentage:.1f}%)', 
                ha='center', va='center', fontsize=10, color='red')

plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150, bbox_inches='tight')
plt.show()
print("Confusion matrix saved as 'confusion_matrix.png'")

In [None]:
# Per-class metrics
tn, fp, fn, tp = cm.ravel()

print("\nPer-Class Performance:")

print("\nHealthy (Class 0):")
print(f"  True Negatives:  {tn:4d}")
print(f"  False Positives: {fp:4d}")
print(f"  Specificity:     {tn/(tn+fp)*100:.2f}%")

print("\nFracture (Class 1):")
print(f"  True Positives:  {tp:4d}")
print(f"  False Negatives: {fn:4d}")
print(f"  Sensitivity:     {tp/(tp+fn)*100:.2f}%")


# Calculate and display balanced accuracy
balanced_acc = (tp/(tp+fn) + tn/(tn+fp)) / 2 * 100
print(f"\nBalanced Accuracy: {balanced_acc:.2f}%")

In [None]:
def visualize_predictions(model, dataloader, device, num_images=12):
    
    model.eval()
    
    images_shown = 0
    fig = plt.figure(figsize=(20, 15))
    
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            
            for idx in range(images.size(0)):
                if images_shown >= num_images:
                    break
                
                plt.subplot(3, 4, images_shown + 1)
                img = images[idx].cpu()
                imshow(img, '')
                
                true_label = 'Fracture' if labels[idx] == 1 else 'Healthy'
                pred_label = 'Fracture' if predicted[idx] == 1 else 'Healthy'
                
                color = 'green' if labels[idx] == predicted[idx] else 'red'
                plt.title(f'True: {true_label}\nPred: {pred_label}', 
                         color=color, fontsize=12, fontweight='bold')
                
                images_shown += 1
            
            if images_shown >= num_images:
                break
    
    plt.tight_layout()
    plt.savefig('prediction_samples.png', dpi=150, bbox_inches='tight')
    plt.show()
    print("Prediction samples saved as 'prediction_samples.png'")

# Visualize predictions
visualize_predictions(model, test_loader, config.DEVICE, num_images=12)

In [None]:
# Create results summary
results_summary = {
    'Model': 'ResNet50',
    'Test Accuracy': f"{test_acc:.2f}%",
    'Test Loss': f"{test_loss:.4f}",
    'Precision': f"{precision:.4f}",
    'Recall': f"{recall:.4f}",
    'F1-Score': f"{f1:.4f}",
    'True Positives': tp,
    'True Negatives': tn,
    'False Positives': fp,
    'False Negatives': fn,
    'Training Time (minutes)': f"{total_time/60:.2f}",
    'Epochs Trained': len(history['train_loss']),
    'Image Size': config.IMAGE_SIZE,
    'Batch Size': config.BATCH_SIZE,
    'Learning Rate': config.LEARNING_RATE
}

# Save to CSV
results_df = pd.DataFrame([results_summary])
results_df.to_csv('model_results_summary.csv', index=False)
print("\nResults Summary:")
print(results_df.T)
print("\nResults saved to 'model_results_summary.csv'")

