In [14]:
import os
import torch
import torch.optim as optim
from torch.utils.data import DataLoader,Dataset
from torchvision import datasets, transforms, models
# import timm
from PIL import Image
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
import torch.nn as nn
%matplotlib inline

In [16]:
import wandb #ne logam la platforma
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/lolluckestar/.netrc


True

Verificam device-ul

In [17]:
def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    if torch.backends.mps.is_available():
        return torch.device('mps')
    return torch.device('cpu')

device = get_device()

print(f'Using device: {device}')

Using device: cuda


Start Configure the Network

In [31]:
configurations = {
    1: { 
        'dataset': 'Brain Tumor Segmentation',
        'model' : 'UNet',
        'batch_size' : 64,
        'num_epochs' : 50,
        'learning_rate' : 0.001,
        'optimizer' : 'Adam',
        'scheduler' : 'StepLR',
        'hidden_size' : 128,
        'n_channels': 3,
        'n_classes': 1,
        'bilinear': True 
    }
}

In [33]:
selected_config = None
model = None
while selected_config is None:
    try:
        config_number = int(input("Enter a number 1 - 6 for each config type: "))
        if config_number in configurations:
            selected_config = configurations[config_number]
            print(f"Selected Configuration: {selected_config}")
        else:
            print("Invalid input. Please enter a number between 1 and 6.")
    except ValueError:
        print("Please enter a valid number.")

wandb.init(project="pytorch_training_pipeline", config=selected_config)
config = wandb.config

Selected Configuration: {'dataset': 'Brain Tumor Segmentation', 'model': 'UNet', 'batch_size': 64, 'num_epochs': 50, 'learning_rate': 0.001, 'optimizer': 'Adam', 'scheduler': 'StepLR', 'hidden_size': 128, 'n_channels': 3, 'n_classes': 1, 'bilinear': True}


Data Augumentation

In [25]:
from torchvision import transforms

image_transform = transforms.Compose([
    transforms.Resize((244, 244)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

mask_transform = transforms.Compose([
    transforms.Resize((244, 244)),
    transforms.ToTensor(),
])

Construct the data loader

In [26]:
from sklearn.model_selection import train_test_split

class CachedDataSet(Dataset):
    def __init__(self, image_files, mask_files, image_transform=None, mask_transform=None):
        self.image_files = image_files
        self.mask_files = mask_files
        self.image_transform = image_transform
        self.mask_transform = mask_transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        img_path = self.image_files[index]
        mask_path = self.mask_files[index]
        
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")
        
        if self.image_transform:
            image = self.image_transform(image)
        if self.mask_transform:
            mask = self.mask_transform(mask)

        return image, mask

def get_train_test_split(image_dir, mask_dir, test_size=0.3, random_seed=42):
    image_files = sorted([os.path.join(image_dir, f) for f in os.listdir(image_dir)])
    mask_files = sorted([os.path.join(mask_dir, f) for f in os.listdir(mask_dir)])
    
    train_images, test_images, train_masks, test_masks = train_test_split(
        image_files, mask_files, test_size=test_size, random_state=random_seed
    )
    
    return train_images, test_images, train_masks, test_masks

image_dir = './Brain Tumor Segmentation/images'
mask_dir = './Brain Tumor Segmentation/masks'

train_images, test_images, train_masks, test_masks = get_train_test_split(image_dir, mask_dir)

train_dataset = CachedDataSet(
    train_images, train_masks, 
    image_transform=image_transform, 
    mask_transform=mask_transform
)
test_dataset = CachedDataSet(
    test_images, test_masks, 
    image_transform=image_transform, 
    mask_transform=mask_transform
)

train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)

for images, masks in train_loader:
    print(f"Train Images batch shape: {images.shape}")
    print(f"Train Masks batch shape: {masks.shape}")
    break

for images, masks in test_loader:
    print(f"Test Images batch shape: {images.shape}")
    print(f"Test Masks batch shape: {masks.shape}")
    break

Train Images batch shape: torch.Size([64, 3, 244, 244])
Train Masks batch shape: torch.Size([64, 1, 244, 244])
Test Images batch shape: torch.Size([64, 3, 244, 244])
Test Masks batch shape: torch.Size([64, 1, 244, 244])


Modelul U Net.

In [27]:
class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=False):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = (DoubleConv(n_channels, 64))
        self.down1 = (Down(64, 128))
        self.down2 = (Down(128, 256))
        self.down3 = (Down(256, 512))
        factor = 2 if bilinear else 1
        self.down4 = (Down(512, 1024 // factor))
        self.up1 = (Up(1024, 512 // factor, bilinear))
        self.up2 = (Up(512, 256 // factor, bilinear))
        self.up3 = (Up(256, 128 // factor, bilinear))
        self.up4 = (Up(128, 64, bilinear))
        self.outc = (OutConv(64, n_classes))

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

    def use_checkpointing(self):
        self.inc = torch.utils.checkpoint(self.inc)
        self.down1 = torch.utils.checkpoint(self.down1)
        self.down2 = torch.utils.checkpoint(self.down2)
        self.down3 = torch.utils.checkpoint(self.down3)
        self.down4 = torch.utils.checkpoint(self.down4)
        self.up1 = torch.utils.checkpoint(self.up1)
        self.up2 = torch.utils.checkpoint(self.up2)
        self.up3 = torch.utils.checkpoint(self.up3)
        self.up4 = torch.utils.checkpoint(self.up4)
        self.outc = torch.utils.checkpoint(self.outc)

In [34]:
if config.model == 'UNet':
        model = UNet(
        n_channels=config.n_channels,
        n_classes=config.n_classes,
        bilinear=config.get('bilinear', False)
    ).to(device)

In [35]:
#optimizator configuration

if config.optimizer == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
elif config.optimizer == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
elif config.optimizer == 'SGD_momentum':
    optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, momentum=0.9)
elif config.optimizer == 'SGD_nesterov':
    optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, momentum=0.9, nesterov=True)
elif config.optimizer == 'AdamW':
    optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate)
elif config.optimizer == 'RMSprop':
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
else:
    optimizer = optim.SGD(model.parameters(), lr=0.1)

In [36]:
# config scheduler

if config.scheduler == 'StepLR':
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)
elif config.scheduler == 'ReduceLROnPlateau':
    scheduler = ReduceLROnPlateau(optimizer, 'min')
else:
    scheduler = None


Train

In [37]:
#training loop : 
patience = 5
best_accuracy = 0.0
epochs_without_improvement = 0

for epoch in range(config.num_epochs):
    model.train()
    median_loss = 0.0
    for images,labels in train_loader:
        images,labels = images.to(device),labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = torch.nn.functional.cross_entropy(outputs,labels)
        loss.backward()
        optimizer.step()
        median_loss += loss.item()
    
    if isinstance(scheduler, ReduceLROnPlateau):
        scheduler.step(median_loss / len(train_loader))
    elif scheduler is not None:
        scheduler.step()
    wandb.log({"epoch": epoch, "loss": median_loss / len(train_loader)})

    # Verification - Valid
    model.eval()
    total_corect = 0
    total = 0
    with torch.no_grad():
        for images,labels in test_loader:
            images,labels = images.to(device),labels.to(device)
            outputs = model(images)
            _,predicted = torch.max(outputs.data,1)
            total += labels.size(0)
            total_corect += (predicted == labels).sum().item()
    accuracy = total_corect/total
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1
    
    if epochs_without_improvement >= patience:
        print(f"Oprire timpurie activată la epoca {epoch+1}")
        break
    
    print({"epoch": epoch, "loss": median_loss / len(train_loader), "accuracy" : accuracy })
    wandb.log({"accuracy" : accuracy})

print("Run Completed!")            

OutOfMemoryError: CUDA out of memory. Tried to allocate 932.00 MiB. GPU 0 has a total capacity of 1.95 GiB of which 872.62 MiB is free. Including non-PyTorch memory, this process has 1.10 GiB memory in use. Of the allocated memory 1.03 GiB is allocated by PyTorch, and 16.44 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)