<a href="https://colab.research.google.com/github/SinghAnsh07/DataScience/blob/main/breast%20cancer%20detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
!unzip -oq "/content/drive/MyDrive/BreaKHis_v1.zip" -d /content/
!ls /content/BreaKHis_v1/histology_slides/breast


benign	count_files.sh	malignant  README.txt


In [15]:
!pip install timm -q

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import numpy as np
import random
import timm
from datetime import datetime

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

data_root = "/content/BreaKHis_v1/histology_slides/breast"
epochs = 50
batch_size = 64
learning_rate = 3e-4

magnifications = ["40X", "100X", "200X", "400X"]
all_results = {}


Device: cuda


In [16]:
class BCDataset(Dataset):
    def __init__(self, files, labels, transform=None):
        self.files = files
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, torch.tensor(self.labels[idx], dtype=torch.float32)


train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.RandomRotation(20),
    transforms.ColorJitter(0.1, 0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])


In [17]:
def load_data(magnification):
    benign = os.path.join(data_root, "benign")
    malignant = os.path.join(data_root, "malignant")

    files, labels = [], []

    for path, label in [(benign, 0), (malignant, 1)]:
        for root, _, names in os.walk(path):
            if os.path.basename(root) == magnification:
                for n in names:
                    if n.lower().endswith(".png"):
                        files.append(os.path.join(root, n))
                        labels.append(label)

    if len(files) == 0:
        print(f"⚠️ No images for {magnification}")
        return None, None, 0, 0, 0

    data = list(zip(files, labels))
    random.shuffle(data)
    files, labels = zip(*data)

    split = int(0.8 * len(files))
    train_files, val_files = files[:split], files[split:]
    train_labels, val_labels = labels[:split], labels[split:]

    train_loader = DataLoader(
        BCDataset(train_files, train_labels, train_transform),
        batch_size=batch_size,
        shuffle=True,
        num_workers=2,
        pin_memory=True
    )

    val_loader = DataLoader(
        BCDataset(val_files, val_labels, val_transform),
        batch_size=batch_size,
        shuffle=False,
        num_workers=2,
        pin_memory=True
    )

    return train_loader, val_loader, len(files), labels.count(0), labels.count(1)


In [18]:
start = datetime.now()

for mag in magnifications:
    acc, _ = train_magnification(mag)
    all_results[mag] = acc

print("\nSUMMARY")
for mag, acc in all_results.items():
    if acc > 0:
        print(f"{mag}: {acc*100:.2f}%")
    else:
        print(f"{mag}: SKIPPED")

print("Time:", datetime.now() - start)



Training 40X
⚠️ No images for 40X

Training 100X
⚠️ No images for 100X

Training 200X
⚠️ No images for 200X

Training 400X


400X Epoch 1: 100%|██████████| 1/1 [00:00<00:00,  1.57it/s]
400X Epoch 2: 100%|██████████| 1/1 [00:00<00:00,  1.37it/s]
400X Epoch 3: 100%|██████████| 1/1 [00:01<00:00,  1.13s/it]
400X Epoch 4: 100%|██████████| 1/1 [00:01<00:00,  1.23s/it]
400X Epoch 5: 100%|██████████| 1/1 [00:01<00:00,  1.04s/it]
400X Epoch 6: 100%|██████████| 1/1 [00:00<00:00,  1.24it/s]
400X Epoch 7: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
400X Epoch 8: 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]
400X Epoch 9: 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
400X Epoch 10: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]
400X Epoch 11: 100%|██████████| 1/1 [00:00<00:00,  1.50it/s]
400X Epoch 12: 100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
400X Epoch 13: 100%|██████████| 1/1 [00:00<00:00,  1.81it/s]
400X Epoch 14: 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
400X Epoch 15: 100%|██████████| 1/1 [00:00<00:00,  1.80it/s]
400X Epoch 16: 100%|██████████| 1/1 [00:00<00:00,  1.74it/s]
400X Epoch 17: 100%|██████████| 1


SUMMARY
40X: SKIPPED
100X: SKIPPED
200X: SKIPPED
400X: 100.00%
Time: 0:00:48.070262
