In [11]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/new_master_dataset.csv")
df

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,mag,path,filename,class,slide_id,tumor_type
0,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-001.png,benign,22549CD,A
1,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-002.png,benign,22549CD,A
2,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-003.png,benign,22549CD,A
3,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-004.png,benign,22549CD,A
4,100,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_B_A-14-22549CD-100-005.png,benign,22549CD,A
...,...,...,...,...,...,...
7904,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-031.png,malignant,15704,PC
7905,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-032.png,malignant,15704,PC
7906,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-033.png,malignant,15704,PC
7907,400,/content/drive/MyDrive/Breast Cancer Project/b...,SOB_M_PC-14-15704-400-034.png,malignant,15704,PC


In [12]:
groupd_df = df.groupby("tumor_type")

# You can then perform various operations on the grouped data, such as getting the count of each tumor type
tumor_type_counts = groupd_df.size()
tumor_type_counts

tumor_type
A      444
DC    3451
F     1014
LC     626
MC     792
PC     560
PT     453
TA     569
dtype: int64

In [13]:
# df_train_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/train_df_100.csv")
# df_test_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/test_df_100.csv")
# df_val_100 = pd.read_csv("/content/drive/MyDrive/Breast Cancer Project/Mag100/val_df_100.csv")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torchvision.transforms as transforms
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import os

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Define constants
data_dirs = ["TA", "PT", "PC", "MC", "LC", "F", "DC", "A"]
data_root = "/content/drive/MyDrive/Breast Cancer Project/IW/100"  # Replace with the root directory of your data
train_split = 0.8

# Create a list to store the paths and labels of all images
all_data = []

# Populate the list with paths and labels
for label, folder in enumerate(data_dirs):
    folder_path = os.path.join(data_root, folder)
    image_files = os.listdir(folder_path)
    for image_file in image_files:
        image_path = os.path.join(folder_path, image_file)
        all_data.append((image_path, label))

# Split data into training and testing sets
train_data, test_data = train_test_split(all_data, train_size=train_split, shuffle=True, random_state=42)

# Define custom dataset class
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        img = Image.open(img_path).convert('RGB')  # Open image and convert to RGB mode
        if self.transform:
            img = self.transform(img)
        label_tensor = torch.tensor(label, dtype=torch.long)  # Convert label to tensor
        return img, label_tensor

# Image preprocessing with augmentation for training
train_transform = transforms.Compose([
    transforms.Resize((700, 460)),
    transforms.RandomRotation(90),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor()
])

# Image preprocessing without augmentation for testing and validation
test_val_transform = transforms.Compose([
    transforms.Resize((700, 460)),
    transforms.ToTensor()
])

# Create custom datasets
train_dataset = CustomDataset(train_data, transform=train_transform)
test_dataset = CustomDataset(test_data, transform=test_val_transform)

# DataLoaders for batching and shuffling
batch_size = 20  # Define the batch size
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load model directly
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = AutoModelForImageClassification.from_pretrained("microsoft/resnet-50").to(device)

# Define optimizer and scheduler
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=3, verbose=True)

# Define loss function
criterion = nn.CrossEntropyLoss()

# Train the model
num_epochs = 70
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False)
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        # Ensure the input tensor is passed correctly
        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * labels.size(0)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        progress_bar.set_postfix({'Loss': train_loss / total, 'Accuracy': 100 * correct / total})

    train_loss = train_loss / len(train_loader.dataset)
    train_accuracy = 100 * correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # Move data to GPU
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            val_loss += loss.item() * labels.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss = val_loss / len(test_loader.dataset)
    val_accuracy = 100 * correct / total

    print(f'Epoch {epoch+1}/{num_epochs}, '
          f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, '
          f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

    # Adjust learning rate
    scheduler.step(val_loss)

# Test the model
model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        outputs = model(images).logits
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_accuracy = 100 * test_correct / test_total
print(f'Test Accuracy: {test_accuracy:.2f}%')

Using device: cuda




Epoch 1/70, Train Loss: 4.5989, Train Accuracy: 27.63%, Val Loss: 3.7325, Val Accuracy: 34.34%




Epoch 2/70, Train Loss: 2.0713, Train Accuracy: 46.37%, Val Loss: 2.7491, Val Accuracy: 38.85%




Epoch 3/70, Train Loss: 1.4644, Train Accuracy: 53.51%, Val Loss: 2.1207, Val Accuracy: 45.11%




Epoch 4/70, Train Loss: 1.1945, Train Accuracy: 60.09%, Val Loss: 1.6687, Val Accuracy: 49.12%




Epoch 5/70, Train Loss: 1.0428, Train Accuracy: 65.54%, Val Loss: 1.2253, Val Accuracy: 60.65%




Epoch 6/70, Train Loss: 0.8807, Train Accuracy: 68.73%, Val Loss: 1.0614, Val Accuracy: 63.66%




Epoch 7/70, Train Loss: 0.7684, Train Accuracy: 74.25%, Val Loss: 1.0518, Val Accuracy: 67.17%




Epoch 8/70, Train Loss: 0.6764, Train Accuracy: 76.75%, Val Loss: 0.9821, Val Accuracy: 66.42%




Epoch 9/70, Train Loss: 0.6212, Train Accuracy: 77.76%, Val Loss: 0.7398, Val Accuracy: 76.19%




Epoch 10/70, Train Loss: 0.5693, Train Accuracy: 80.14%, Val Loss: 0.7652, Val Accuracy: 74.19%




Epoch 11/70, Train Loss: 0.5009, Train Accuracy: 82.83%, Val Loss: 0.8216, Val Accuracy: 71.93%




Epoch 12/70, Train Loss: 0.4449, Train Accuracy: 84.96%, Val Loss: 0.6446, Val Accuracy: 74.94%




Epoch 13/70, Train Loss: 0.4313, Train Accuracy: 84.40%, Val Loss: 0.4802, Val Accuracy: 81.95%




Epoch 14/70, Train Loss: 0.3851, Train Accuracy: 85.59%, Val Loss: 0.5337, Val Accuracy: 81.45%




Epoch 15/70, Train Loss: 0.3368, Train Accuracy: 88.03%, Val Loss: 0.5426, Val Accuracy: 82.96%




Epoch 16/70, Train Loss: 0.3430, Train Accuracy: 88.22%, Val Loss: 0.5241, Val Accuracy: 80.20%




Epoch 17/70, Train Loss: 0.3339, Train Accuracy: 87.47%, Val Loss: 0.4334, Val Accuracy: 83.71%




Epoch 18/70, Train Loss: 0.3038, Train Accuracy: 89.10%, Val Loss: 0.4295, Val Accuracy: 85.46%




Epoch 19/70, Train Loss: 0.2527, Train Accuracy: 91.04%, Val Loss: 0.4779, Val Accuracy: 82.71%




Epoch 20/70, Train Loss: 0.2451, Train Accuracy: 91.10%, Val Loss: 0.4504, Val Accuracy: 85.21%




Epoch 21/70, Train Loss: 0.2510, Train Accuracy: 91.29%, Val Loss: 0.3827, Val Accuracy: 86.22%




Epoch 22/70, Train Loss: 0.2289, Train Accuracy: 91.67%, Val Loss: 0.3103, Val Accuracy: 88.47%




Epoch 23/70, Train Loss: 0.2341, Train Accuracy: 91.85%, Val Loss: 0.3389, Val Accuracy: 88.97%




Epoch 24/70, Train Loss: 0.2069, Train Accuracy: 92.48%, Val Loss: 0.3316, Val Accuracy: 87.47%




Epoch 25/70, Train Loss: 0.1965, Train Accuracy: 92.48%, Val Loss: 0.4478, Val Accuracy: 84.21%




Epoch 26/70, Train Loss: 0.1965, Train Accuracy: 93.11%, Val Loss: 0.3309, Val Accuracy: 89.47%
Epoch 00026: reducing learning rate of group 0 to 1.0000e-05.




Epoch 27/70, Train Loss: 0.1765, Train Accuracy: 93.67%, Val Loss: 0.2805, Val Accuracy: 89.72%




Epoch 28/70, Train Loss: 0.1613, Train Accuracy: 93.55%, Val Loss: 0.2954, Val Accuracy: 89.22%




Epoch 29/70, Train Loss: 0.1784, Train Accuracy: 93.36%, Val Loss: 0.2815, Val Accuracy: 89.97%




Epoch 30/70, Train Loss: 0.1618, Train Accuracy: 93.73%, Val Loss: 0.2727, Val Accuracy: 90.23%




Epoch 31/70, Train Loss: 0.1573, Train Accuracy: 94.24%, Val Loss: 0.2832, Val Accuracy: 90.73%




Epoch 32/70, Train Loss: 0.1550, Train Accuracy: 95.18%, Val Loss: 0.2720, Val Accuracy: 90.48%




Epoch 33/70, Train Loss: 0.1492, Train Accuracy: 94.30%, Val Loss: 0.2739, Val Accuracy: 89.97%




Epoch 34/70, Train Loss: 0.1579, Train Accuracy: 94.30%, Val Loss: 0.2831, Val Accuracy: 90.98%




Epoch 35/70, Train Loss: 0.1503, Train Accuracy: 94.67%, Val Loss: 0.2648, Val Accuracy: 90.73%




Epoch 36/70, Train Loss: 0.1425, Train Accuracy: 95.11%, Val Loss: 0.2670, Val Accuracy: 90.48%




Epoch 37/70, Train Loss: 0.1638, Train Accuracy: 93.73%, Val Loss: 0.2712, Val Accuracy: 89.72%




Epoch 38/70, Train Loss: 0.1420, Train Accuracy: 94.80%, Val Loss: 0.2609, Val Accuracy: 89.72%




Epoch 39/70, Train Loss: 0.1500, Train Accuracy: 95.05%, Val Loss: 0.2762, Val Accuracy: 90.23%




Epoch 40/70, Train Loss: 0.1464, Train Accuracy: 94.74%, Val Loss: 0.2797, Val Accuracy: 89.97%




Epoch 41/70, Train Loss: 0.1426, Train Accuracy: 95.36%, Val Loss: 0.2654, Val Accuracy: 90.23%




Epoch 42/70, Train Loss: 0.1506, Train Accuracy: 94.67%, Val Loss: 0.2752, Val Accuracy: 90.98%
Epoch 00042: reducing learning rate of group 0 to 1.0000e-06.




Epoch 43/70, Train Loss: 0.1526, Train Accuracy: 93.98%, Val Loss: 0.2782, Val Accuracy: 90.73%




Epoch 44/70, Train Loss: 0.1271, Train Accuracy: 95.93%, Val Loss: 0.2450, Val Accuracy: 90.98%




Epoch 45/70, Train Loss: 0.1580, Train Accuracy: 94.05%, Val Loss: 0.2690, Val Accuracy: 89.97%




Epoch 46/70, Train Loss: 0.1462, Train Accuracy: 94.49%, Val Loss: 0.2770, Val Accuracy: 91.23%




Epoch 47/70, Train Loss: 0.1433, Train Accuracy: 95.49%, Val Loss: 0.2772, Val Accuracy: 90.98%




Epoch 48/70, Train Loss: 0.1486, Train Accuracy: 94.92%, Val Loss: 0.2847, Val Accuracy: 89.72%
Epoch 00048: reducing learning rate of group 0 to 1.0000e-07.




Epoch 49/70, Train Loss: 0.1421, Train Accuracy: 94.80%, Val Loss: 0.2641, Val Accuracy: 90.48%




Epoch 50/70, Train Loss: 0.1595, Train Accuracy: 94.36%, Val Loss: 0.2703, Val Accuracy: 90.73%




Epoch 51/70, Train Loss: 0.1425, Train Accuracy: 95.05%, Val Loss: 0.2659, Val Accuracy: 90.48%




Epoch 52/70, Train Loss: 0.1359, Train Accuracy: 95.05%, Val Loss: 0.2772, Val Accuracy: 90.23%
Epoch 00052: reducing learning rate of group 0 to 1.0000e-08.




Epoch 53/70, Train Loss: 0.1509, Train Accuracy: 94.74%, Val Loss: 0.2939, Val Accuracy: 90.23%




Epoch 54/70, Train Loss: 0.1387, Train Accuracy: 95.05%, Val Loss: 0.2670, Val Accuracy: 90.98%


Epoch 55/70:  12%|█▎        | 10/80 [00:19<02:16,  1.95s/it, Loss=0.131, Accuracy=95]