In [2]:
import torch
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader
import torch.nn as nn
from augmentations import normalization
import torch.optim as optim
from tqdm import tqdm
import random
import numpy as np
from data_loader import HDF5Dataset, HDF5Dataset_Labels, MergedDataset
import wandb 
import yaml




ModuleNotFoundError: No module named 'augmentations'

In [3]:
yaml_file_path = './hyper.yaml'
with open(yaml_file_path, 'r') as file:
    hyperparameters = yaml.safe_load(file)

learning_rate = hyperparameters['learning_rate']
batch_size = hyperparameters['batch_size']
num_epochs = hyperparameters['num_epochs']

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

FileNotFoundError: [Errno 2] No such file or directory: './hyper.yaml'

In [1]:
pip install pytorch-lightning

Defaulting to user installation because normal site-packages is not writeable
Collecting pytorch-lightning
  Downloading pytorch_lightning-2.2.0.post0-py3-none-any.whl (800 kB)
[K     |████████████████████████████████| 800 kB 3.0 MB/s eta 0:00:01
Collecting lightning-utilities>=0.8.0
  Downloading lightning_utilities-0.10.1-py3-none-any.whl (24 kB)
Collecting torchmetrics>=0.7.0
  Downloading torchmetrics-1.3.1-py3-none-any.whl (840 kB)
[K     |████████████████████████████████| 840 kB 13.6 MB/s eta 0:00:01
Collecting aiohttp!=4.0.0a0,!=4.0.0a1
  Downloading aiohttp-3.9.3-cp39-cp39-macosx_11_0_arm64.whl (388 kB)
[K     |████████████████████████████████| 388 kB 56.0 MB/s eta 0:00:01
[?25hCollecting async-timeout<5.0,>=4.0
  Downloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)
Collecting attrs>=17.3.0
  Downloading attrs-23.2.0-py3-none-any.whl (60 kB)
[K     |████████████████████████████████| 60 kB 14.7 MB/s eta 0:00:01
[?25hCollecting frozenlist>=1.1.1
  Downloading frozenlis

In [3]:
wandb.login(key='5a1e342ba066e42ad7a4054d1cfcd50ee372775a')
wandb.init(project="Histo_C16_No_Augmentations", entity="drexlin-david", name="your_custom_run_name")
config = wandb.config
config.learning_rate = learning_rate
config.batch_size = batch_size
config.epochs = num_epochs

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdrexlin-david[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/daviddrexlin/.netrc


In [4]:

# Use the custom dataset
x = HDF5Dataset('./data/pcam/camelyonpatch_level_2_split_train_x.h5-002', 'x', normalization())
y =  HDF5Dataset_Labels('./data/pcam/camelyonpatch_level_2_split_train_y.h5', 'y') 
data = MergedDataset(x,y)

# Create a DataLoader
train_loader = DataLoader(data, batch_size = batch_size, shuffle=True)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

In [6]:
# Initialize the ResNet18 model
model = models.resnet18(pretrained=False)
# Modify the last layer for binary classification


# Define the loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)



In [7]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit='batch'):
        inputs, labels = inputs.to(device), labels.to(device)
        labels = torch.squeeze(labels)
        
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()

        # Statistics
        running_loss += loss.item()
        predicted = torch.sigmoid(outputs).squeeze() > 0.5
    
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct / total
    wandb.log({"loss": epoch_loss, "accuracy": epoch_acc})

    print(f"Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
wandb.finish()
# You can add code for testing the model on test_loader similar to the training loop

Epoch 1/10:   0%|          | 0/64 [00:00<?, ?batch/s]

Epoch 1/10:   8%|▊         | 5/64 [02:12<26:23, 26.84s/batch]

In [7]:
import h5py
from torch.utils.data import Dataset

class HDF5Dataset_Labels(Dataset):
    def __init__(self, file_path, dataset_name):
        self.file_path = file_path
        self.dataset_name = dataset_name

        with h5py.File(self.file_path, 'r') as file:
            self.dataset_len = len(file[self.dataset_name])

    def __len__(self):
        return self.dataset_len
    
    def __getitem__(self, idx):
        with h5py.File(self.file_path, 'r') as file:
            data = file[self.dataset_name][idx]

            return data

class HDF5Dataset(Dataset):
    def __init__(self, file_path, dataset_name, transform):
        self.file_path = file_path
        self.dataset_name = dataset_name
        self.transform = transform

        with h5py.File(self.file_path, 'r') as file:
            self.dataset_len = len(file[self.dataset_name])

    def __len__(self):
        return self.dataset_len

    def __getitem__(self, idx):
        with h5py.File(self.file_path, 'r') as file:
            data = file[self.dataset_name][idx]

        # Apply the transformations
        if self.transform:
            data = self.transform(data)

        # Convert data to a PyTorch tensor if it's a NumPy array
        #if isinstance(data, np.ndarray):
        #    data = torch.from_numpy(data)
        
        #print(type(data))
        # Now you can safely clone and detach
        data = data.clone().detach()

        return data

class MergedDataset(Dataset):
    def __init__(self, data_dataset, labels_dataset):
        self.data_dataset = data_dataset
        self.labels_dataset = labels_dataset
        assert len(self.data_dataset) == len(self.labels_dataset), "Datasets must be of equal length"

    def __len__(self):
        return len(self.data_dataset)

    def __getitem__(self, idx):
        data = self.data_dataset[idx]
        label = self.labels_dataset[idx]
        return data, label

In [10]:
import torchvision.transforms as transforms
from PIL import Image

# Use the custom dataset
def numpy_to_pil(np_array):
    # Assuming the numpy array is an image with shape (H, W, C)
    return Image.fromarray(np_array.astype('uint8'), 'RGB')

transforms = transforms.Compose([
    transforms.Lambda(numpy_to_pil),
    #transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.7008, 0.5384, 0.6916], std=[0.2350, 0.2774, 0.2129])
    # std for 224, 224 ->>> transforms.Normalize(mean=[0.7008, 0.5384, 0.6916], std=[0.2177, 0.2621, 0.1947]) 
])

x = HDF5Dataset('/Users/daviddrexlin/Code/Master/data/pcam/camelyonpatch_level_2_split_test_x.h5', 'x', transform=transforms)
y =  HDF5Dataset_Labels('./Users/daviddrexlin/Code/Master/data/pcam/camelyonpatch_level_2_split_test_y.h5', 'y', transform=transforms) 
data = MergedDataset(x,y)
test_loader = DataLoader(data, batch_size = 2048, shuffle=True)

TypeError: __init__() got an unexpected keyword argument 'transform'

In [None]:
model.eval()  # Set the model to evaluation mode

total_loss = 0.0
correct_predictions = 0
total_predictions = 0

with torch.no_grad():  # No need to track gradients during testing

    
    for inputs, labels in tqdm(test_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        labels = torch.squeeze(labels)

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs.squeeze(), labels.float())  # Compute loss
        total_loss += loss.item()
        
        predicted = torch.sigmoid(outputs).squeeze() > 0.5
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

avg_loss = total_loss / len(test_loader)
accuracy = correct_predictions / total_predictions * 100

print(f"Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")

100%|██████████| 16/16 [00:42<00:00,  2.66s/it]

Loss: 1.7842, Accuracy: 68.9240





In [None]:
wandb.log({"test_loss": avg_loss, "test_accuracy": accuracy})

NameError: name 'wandb' is not defined