In [1]:
# Cell 1: Import Libraries
print("[INFO] Importing libraries...")
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
torch.cuda.manual_seed(42)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print("[INFO] Libraries imported successfully.")

[INFO] Importing libraries...
Using device: cpu
[INFO] Libraries imported successfully.


In [2]:
# Cell 2: Data Preparation Function
print("[INFO] Defining data preparation function...")
def load_kidney_tumor_data(excel_path, base_scan_path):
    print("[INFO] Loading and preprocessing patient data...")
    df = pd.read_excel(excel_path)
    df['binary_label'] = df['Situation'].map({
        'Tumor': 1, 
        'Normal case with cyst': 0, 
        'Normal case': 0
    })
    image_paths = []
    labels = []
    for patient_id in df.index:
        patient_folder = os.path.join(base_scan_path, f"{patient_id:02d}")
        if os.path.exists(patient_folder):
            patient_label = df.loc[patient_id, 'binary_label']
            for subfolder in os.listdir(patient_folder):
                subfolder_path = os.path.join(patient_folder, subfolder)
                if os.path.isdir(subfolder_path):
                    for img_file in os.listdir(subfolder_path):
                        if img_file.endswith('.jpg'):
                            img_path = os.path.join(subfolder_path, img_file)
                            image_paths.append(img_path)
                            labels.append(patient_label)
    print("[INFO] Data loaded and processed successfully.")
    return image_paths, labels
print("[INFO] Data preparation function defined.")

[INFO] Defining data preparation function...
[INFO] Data preparation function defined.


In [3]:
# Cell 3: Custom Dataset
print("[INFO] Defining custom dataset class...")
class KidneyTumorDataset(Dataset):
    def __init__(self, image_paths, labels, is_train=True):
        self.image_paths = image_paths
        self.labels = labels
        self.is_train = is_train
        self.train_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.val_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        transform = self.train_transform if self.is_train else self.val_transform
        image = transform(image)
        return image, self.labels[idx]
print("[INFO] Custom dataset class defined.")

[INFO] Defining custom dataset class...
[INFO] Custom dataset class defined.


In [4]:
# Cell 4: Data Loader Creation
print("[INFO] Defining data loader creation function...")
def create_data_loaders(image_paths, labels, batch_size=64, test_size=0.2):
    print("[INFO] Creating train and validation data loaders...")
    X_train, X_val, y_train, y_val = train_test_split(
        image_paths, labels, 
        test_size=test_size, 
        stratify=labels, 
        random_state=42
    )
    train_dataset = KidneyTumorDataset(X_train, y_train, is_train=True)
    val_dataset = KidneyTumorDataset(X_val, y_val, is_train=False)
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=2, 
        pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=2, 
        pin_memory=True
    )
    print("[INFO] Data loaders created successfully.")
    return train_loader, val_loader
print("[INFO] Data loader creation function defined.")

[INFO] Defining data loader creation function...
[INFO] Data loader creation function defined.


In [5]:
# Cell 5: ResNet50 Model with Custom Classification Head
print("[INFO] Defining ResNet50 model with custom classification head...")
class ResNet50KidneyTumorClassifier(nn.Module):
    def __init__(self, num_classes=1):
        super().__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        for param in self.resnet.parameters():
            param.requires_grad = False
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.resnet(x)
print("[INFO] ResNet50 model defined.")

[INFO] Defining ResNet50 model with custom classification head...
[INFO] ResNet50 model defined.


In [6]:
# Cell 6: Training Function
print("[INFO] Defining training function...")
def train_kidney_tumor_model(model, train_loader, val_loader, num_epochs=1):
    print("[INFO] Starting training...")
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(
        model.parameters(), 
        lr=1e-3, 
        weight_decay=1e-4
    )
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 
        mode='max', 
        patience=3, 
        factor=0.1
    )
    model.to(device)
    best_val_accuracy = 0
    for epoch in range(num_epochs):
        model.train()
        train_loss, train_correct, train_total = 0, 0, 0
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.float().to(device)
            optimizer.zero_grad()
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            predicted = (outputs.sigmoid() > 0.5).float()
            train_correct += (predicted == labels).float().sum().item()
            train_total += labels.size(0)
        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.float().to(device)
                outputs = model(images).squeeze()
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                predicted = (outputs.sigmoid() > 0.5).float()
                val_correct += (predicted == labels).float().sum().item()
                val_total += labels.size(0)
        train_accuracy = train_correct / train_total
        val_accuracy = val_correct / val_total
        scheduler.step(val_accuracy)
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss/len(train_loader):.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {val_accuracy:.4f}")
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_kidney_tumor_model.pth')
    print("[INFO] Training completed.")
    return model
print("[INFO] Training function defined.")

[INFO] Defining training function...
[INFO] Training function defined.


In [7]:
# Cell 7: Model Evaluation
def evaluate_kidney_tumor_model(model, val_loader):
    print("[INFO] Starting model evaluation...")
    model.eval()
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images).squeeze()
            predictions = (outputs.sigmoid() > 0.5).float()
            
            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Classification Report
    print("[INFO] Generating classification report...")
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds))
    
    # Confusion Matrix Visualization
    print("[INFO] Generating confusion matrix...")
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    
    print("[INFO] Model evaluation completed.")


In [None]:
# Cell 8: Main Execution
def main():
    print("[INFO] Main execution started...")
    
    # Paths
    excel_path = 'Dataset/00Kidney_Patients.xlsx'
    base_scan_path = 'unzipped_scans'
    print("[INFO] Paths set for Excel and scan directories.")
    
    # Load data
    print("[INFO] Loading data...")
    image_paths, labels = load_kidney_tumor_data(excel_path, base_scan_path)
    print(f"[INFO] Data loaded. Total images: {len(image_paths)}, Labels: {len(labels)}")
    
    # Create data loaders
    print("[INFO] Creating data loaders...")
    train_loader, val_loader = create_data_loaders(image_paths, labels)
    print("[INFO] Data loaders created.")
    
    # Initialize model
    print("[INFO] Initializing model...")
    model = ResNet50KidneyTumorClassifier()
    print("[INFO] Model initialized.")
    
    # Train model
    print("[INFO] Starting training...")
    trained_model = train_kidney_tumor_model(model, train_loader, val_loader)
    print("[INFO] Training completed.")
    
    # Evaluate model
    print("[INFO] Evaluating trained model...")
    evaluate_kidney_tumor_model(trained_model, val_loader)
    print("[INFO] Main execution completed.")
    
# Run the main function
if __name__ == '__main__':
    print("[INFO] Script execution started...")
    main()
    print("[INFO] Script execution finished.")


[INFO] Script execution started...
[INFO] Main execution started...
[INFO] Paths set for Excel and scan directories.
[INFO] Loading data...
[INFO] Loading and preprocessing patient data...
[INFO] Data loaded and processed successfully.
[INFO] Data loaded. Total images: 7701, Labels: 7701
[INFO] Creating data loaders...
[INFO] Creating train and validation data loaders...
[INFO] Data loaders created successfully.
[INFO] Data loaders created.
[INFO] Initializing model...
[INFO] Model initialized.
[INFO] Starting training...
[INFO] Starting training...
