In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import timm
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import seaborn as sns
import albumentations as A
from albumentations.pytorch import ToTensorV2
import logging
import cv2

# Suppress albumentations update warning
os.environ["NO_ALBUMENTATIONS_UPDATE"] = "1"

# Set up logging
logging.basicConfig(filename='training.log', level=logging.INFO, format='%(message)s')

# Check environment
print(f"PyTorch Version: {torch.__version__}")
print(f"CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Count: {torch.cuda.device_count()}")

# Set device (try GPU, fallback to CPU)
try:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if device.type == "cuda":
        torch.cuda.set_device(0)
        torch.cuda.empty_cache()
except Exception as e:
    print(f"GPU initialization failed: {e}. Falling back to CPU.")
    device = torch.device("cpu")
print(f"Using device: {device}")

# Custom Dataset for DDSM
class DDSMDataset(Dataset):
    def __init__(self, data_source, transform=None, is_tfrecord=True):
        self.transform = transform
        self.is_tfrecord = is_tfrecord
        self.data = []
        self.labels = []
        if is_tfrecord:
            self._load_tfrecords(data_source)
        else:
            self._load_numpy(data_source)

    def _load_tfrecords(self, tfrecord_files):
        dataset = tf.data.TFRecordDataset(tfrecord_files)
        invalid_labels = 0
        invalid_images = 0
        for raw_record in dataset:
            try:
                example = tf.train.Example()
                example.ParseFromString(raw_record.numpy())
                label = example.features.feature['label_normal'].int64_list.value[0]
                if label not in [0, 1]:
                    invalid_labels += 1
                    continue
                image_raw = example.features.feature['image'].bytes_list.value[0]
                image = np.frombuffer(image_raw, dtype=np.uint8)
                if image.size != 299 * 299:
                    invalid_images += 1
                    continue
                image = image.reshape(299, 299, 1)
                if np.any(np.isnan(image)) or not np.all(np.isfinite(image)):
                    invalid_images += 1
                    continue
                self.data.append(image)
                self.labels.append(int(label))
            except Exception:
                invalid_images += 1
                continue
        print(f"Skipped {invalid_labels} samples with invalid labels, {invalid_images} with invalid images in tfrecords")

    def _load_numpy(self, data_source):
        data_file, label_file = data_source
        images = np.load(data_file)
        labels = np.load(label_file)
        invalid_images = 0
        for img, lbl in zip(images, labels):
            try:
                if img.shape == (299, 299):
                    img = img.reshape(299, 299, 1)
                if np.any(np.isnan(img)) or not np.all(np.isfinite(img)):
                    invalid_images += 1
                    continue
                binary_lbl = 0 if int(lbl) == 0 else 1
                self.data.append(img)
                self.labels.append(binary_lbl)
            except Exception:
                invalid_images += 1
                continue
        print(f"Skipped {invalid_images} samples with invalid images in numpy files")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img = self.data[idx]
        label = self.labels[idx]
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        if self.transform:
            augmented = self.transform(image=img)
            img = augmented['image']
        return img, label

# Define augmentations with additional transformations
train_transform = A.Compose([
    A.Resize(224, 224),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])
val_transform = A.Compose([
    A.Resize(224, 224),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

# Load dataset
DATA_DIR = "/kaggle/input/ddsm-mammography"
train_files = [
    os.path.join(DATA_DIR, f"training10_{i}/training10_{i}.tfrecords") for i in range(5)
]
val_data = (os.path.join(DATA_DIR, "cv10_data/cv10_data.npy"), os.path.join(DATA_DIR, "cv10_labels.npy"))
test_data = (os.path.join(DATA_DIR, "test10_data/test10_data.npy"), os.path.join(DATA_DIR, "test10_labels.npy"))

train_dataset = DDSMDataset(train_files, transform=train_transform, is_tfrecord=True)
val_dataset = DDSMDataset(val_data, transform=val_transform, is_tfrecord=False)
test_dataset = DDSMDataset(test_data, transform=val_transform, is_tfrecord=False)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=1)

print(f"Dataset sizes: Train={len(train_dataset)}, Val={len(val_dataset)}, Test={len(test_dataset)}")

# Compute class weights
labels = np.array([lbl for lbl in train_dataset.labels])
print(f"Raw Labels: Min={np.min(labels)}, Max={np.max(labels)}, Unique={np.unique(labels)}")
if not np.all(np.isin(labels, [0, 1])):
    raise ValueError(f"Non-binary labels detected: {np.unique(labels)}")
class_counts = np.bincount(labels, minlength=2)
print(f"Class counts: Negative={class_counts[0]}, Positive={class_counts[1]}")
if class_counts[0] == 0 or class_counts[1] == 0:
    raise ValueError("One class has zero samples, cannot compute class weights")
total_samples = len(labels)
class_weights = [total_samples / (2.0 * class_counts[i]) for i in range(2)]
print(f"Class weights: {class_weights}")
if not all(np.isfinite(class_weights)):
    raise ValueError(f"Invalid class weights: {class_weights}")
class_weights = torch.tensor(class_weights[1], dtype=torch.float32, device=device)

# EfficientNet-B0 Model
class EfficientNetB0Classifier(nn.Module):
    def __init__(self, num_classes=1):
        super(EfficientNetB0Classifier, self).__init__()
        try:
            self.effnet = timm.create_model('efficientnet_b0', pretrained=True, num_classes=0)
            print("Loaded efficientnet_b0 pretrained weights")
        except Exception as e:
            print(f"Failed to load efficientnet_b0 pretrained weights: {e}. Trying tf_efficientnet_b0...")
            try:
                self.effnet = timm.create_model('tf_efficientnet_b0', pretrained=True, num_classes=0)
                print("Loaded tf_efficientnet_b0 pretrained weights")
            except Exception as e2:
                print(f"Failed to load tf_efficientnet_b0 pretrained weights: {e2}. Using random init.")
                self.effnet = timm.create_model('efficientnet_b0', pretrained=False, num_classes=num_classes)
        # Unfreeze the last block (block 7) and classifier
        for name, param in self.effnet.named_parameters():
            if "blocks.6" in name:  # Unfreeze block 6 (last block in EfficientNet-B0)
                param.requires_grad = True
            else:
                param.requires_grad = False
        # Custom classifier with dropout
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(self.effnet.num_features, num_classes)
        )
        for param in self.classifier.parameters():
            param.requires_grad = True

    def forward(self, x):
        x = self.effnet(x)
        return self.classifier(x)

# Initialize model and optimizer
try:
    model = EfficientNetB0Classifier(num_classes=1).to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)
    # Use different learning rates for backbone and classifier
    optimizer = optim.Adam([
        {'params': [p for n, p in model.effnet.named_parameters() if p.requires_grad], 'lr': 1e-5},
        {'params': model.classifier.parameters(), 'lr': 1e-4}
    ])
except Exception as e:
    print(f"GPU model initialization failed: {e}. Falling back to CPU.")
    device = torch.device("cpu")
    model = EfficientNetB0Classifier(num_classes=1).to(device)
    class_weights = class_weights.to(device)
    criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)
    optimizer = optim.Adam([
        {'params': [p for n, p in model.effnet.named_parameters() if p.requires_grad], 'lr': 1e-5},
        {'params': model.classifier.parameters(), 'lr': 1e-4}
    ])

# Training parameters
epochs = 100
best_val_acc = 0
train_acc, val_acc = [], []
train_loss, val_loss = [], []
train_auc, val_auc = [], []

# Evaluation function
def evaluate_dl(loader, model, criterion):
    model.eval()
    correct, total, loss_total = 0, 0, 0
    y_true, y_scores = [], []
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device).float().view(-1, 1)
            if torch.any(torch.isnan(images)) or torch.any(torch.isinf(images)):
                print("Invalid input images (NaN or inf)")
                continue
            outputs = model(images)
            if torch.any(torch.isnan(outputs)) or torch.any(torch.isinf(outputs)):
                print("Invalid model outputs (NaN or inf)")
                continue
            loss = criterion(outputs, labels)
            predicted = (torch.sigmoid(outputs) >= 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            loss_total += loss.item()
            y_true.extend(labels.cpu().numpy().flatten())
            y_scores.extend(torch.sigmoid(outputs).cpu().numpy().flatten())
    acc = correct / total if total > 0 else 0
    auc = roc_auc_score(y_true, y_scores) if len(np.unique(y_true)) > 1 and len(y_true) > 0 else 0
    return acc, loss_total / len(loader) if len(loader) > 0 else 0, auc, y_true, y_scores

# Training loop
for epoch in range(epochs):
    model.train()
    running_loss = 0
    for i, (images, labels) in enumerate(train_loader):
        try:
            images, labels = images.to(device), labels.to(device).float().view(-1, 1)
            if torch.any(torch.isnan(images)) or torch.any(torch.isinf(images)):
                print(f"Batch {i}: Invalid input images (NaN or inf)")
                continue
            optimizer.zero_grad()
            outputs = model(images)
            if torch.any(torch.isnan(outputs)) or torch.any(torch.isinf(outputs)):
                print(f"Batch {i}: Invalid model outputs (NaN or inf)")
                continue
            loss = criterion(outputs, labels)
            if torch.isnan(loss) or torch.isinf(loss):
                print(f"Batch {i}: Invalid loss (NaN or inf)")
                continue
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 100 == 0:
                print(f"Epoch {epoch+1}, Batch {i}, Loss: {loss.item():.4f}")
        except RuntimeError as e:
            print(f"GPU error in batch {i}: {e}. Falling back to CPU.")
            device = torch.device("cpu")
            model = model.to(device)
            images, labels = images.to(device), labels.to(device)
            class_weights = class_weights.to(device)
            criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)
            optimizer = optim.Adam([
                {'params': [p for n, p in model.effnet.named_parameters() if p.requires_grad], 'lr': 1e-5},
                {'params': model.classifier.parameters(), 'lr': 1e-4}
            ])
            continue

    t_acc, t_loss, t_auc, _, _ = evaluate_dl(train_loader, model, criterion)
    v_acc, v_loss, v_auc, _, _ = evaluate_dl(val_loader, model, criterion)
    train_acc.append(t_acc)
    val_acc.append(v_acc)
    train_loss.append(t_loss)
    val_loss.append(v_loss)
    train_auc.append(t_auc)
    val_auc.append(v_auc)
    print(f"Epoch {epoch+1}: Train Acc: {t_acc:.4f}, Val Acc: {v_acc:.4f}, Train AUC: {t_auc:.4f}, Val AUC: {v_auc:.4f}, Train Loss: {t_loss:.4f}, Val Loss: {v_loss:.4f}")

    if v_acc > best_val_acc:
        best_val_acc = v_acc
        torch.save(model.state_dict(), 'efficientnet_b0_best.pth')

# Evaluate on Test set
model.load_state_dict(torch.load('efficientnet_b0_best.pth', map_location=device, weights_only=True))
test_acc, test_loss, test_auc, y_true_test, y_scores_test = evaluate_dl(test_loader, model, criterion)

print(f"\nFinal Results:")
print(f"Test Acc: {test_acc:.4f}, AUC: {test_auc:.4f}, Loss: {test_loss:.4f}")

# Classification report
print("\nTest Classification Report:")
print(classification_report(y_true_test, (np.array(y_scores_test) >= 0.5).astype(int), target_names=['NEGATIVE', 'POSITIVE']))

# Graphs
cm = confusion_matrix(y_true_test, (np.array(y_scores_test) >= 0.5).astype(int))
plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt="d", xticklabels=['NEGATIVE', 'POSITIVE'], yticklabels=['NEGATIVE', 'POSITIVE'])
plt.title("Confusion Matrix (Test Set)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.savefig('confusion_matrix.png')
plt.show()

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_acc, label='Train Acc')
plt.plot(val_acc, label='Val Acc')
plt.title("Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Train Loss')
plt.plot(val_loss, label='Val Loss')
plt.title("Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.tight_layout()
plt.savefig('training_history.png')
plt.show()