In [1]:
# preprossesing
# Read the picture files (stored in data folder).
# Decode the JPEG content to RGB grids of pixels with channels.
# Convert these into floating-point tensors for input to neural nets.
# Rescale the pixel values (between 0 and 255) to the [0, 1] interval (as training neural networks with this range gets efficient).
import pandas as pd
import torch
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
import torch.nn.functional as F
import random
from PIL import ImageOps

# load and display an image with Matplotlib
train_dataset = torchvision.datasets.ImageFolder(root= r'C:\Users\elias\Documents\Neural networks\Advanced\Project\cross_out_dataset\train\images')
valid_dataset = torchvision.datasets.ImageFolder(root= r'C:\Users\elias\Documents\Neural networks\Advanced\Project\cross_out_dataset\val\images')



In [2]:
import random

import wandb

# Start a new wandb run to track this script.
run = wandb.init(
    # Set the wandb entity where your project will be logged (generally your team name).
    entity="eliassailekarlsson-lule-university-of-technology",
    # Set the wandb project where this run will be logged.
    project="ResNet",
    # Track hyperparameters and run metadata.
    config={
        "learning_rate": 0.0002,
        "architecture": "ResNet",
        "dataset": "cross_out_dataset",
        "epochs": 10,
    },
)

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Currently logged in as: eliassailekarlsson (eliassailekarlsson-lule-university-of-technology) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [3]:
# inproved
def pad_or_truncate_tensor(tensor, target_shape, position="leftUp"):
    """
    Pads or truncates a tensor to the specified target shape,
    aligning the original content according to `position`.
    
    Args:
        tensor (torch.Tensor): The input tensor (2D or more).
        target_shape (tuple): Desired shape (must match the last two dims).
        position (str): One of 'leftUp', 'rightUp', 'leftDown', 'rightDown', 'center'.

    Returns:
        torch.Tensor: Padded or truncated tensor.
    """
    orig_shape = tensor.shape[-2:]
    padded_tensor = torch.zeros(*tensor.shape[:-2], *target_shape, dtype=tensor.dtype, device=tensor.device)

    crop_h = min(orig_shape[0], target_shape[0])
    crop_w = min(orig_shape[1], target_shape[1])

    # Determine input crop start
    if position == "leftUp":
        in_start_y, in_start_x = 0, 0
        out_start_y, out_start_x = 0, 0
    elif position == "rightUp":
        in_start_y = 0
        in_start_x = max(0, orig_shape[1] - crop_w)
        out_start_y = 0
        out_start_x = max(0, target_shape[1] - crop_w)
    elif position == "leftDown":
        in_start_y = max(0, orig_shape[0] - crop_h)
        in_start_x = 0
        out_start_y = max(0, target_shape[0] - crop_h)
        out_start_x = 0
    elif position == "rightDown":
        in_start_y = max(0, orig_shape[0] - crop_h)
        in_start_x = max(0, orig_shape[1] - crop_w)
        out_start_y = max(0, target_shape[0] - crop_h)
        out_start_x = max(0, target_shape[1] - crop_w)
    elif position == "center":
        in_start_y = max(0, (orig_shape[0] - crop_h) // 2)
        in_start_x = max(0, (orig_shape[1] - crop_w) // 2)
        out_start_y = max(0, (target_shape[0] - crop_h) // 2)
        out_start_x = max(0, (target_shape[1] - crop_w) // 2)
    else:
        raise ValueError(f"Unknown position: {position}")

    # Crop the input
    cropped = tensor[..., in_start_y:in_start_y + crop_h, in_start_x:in_start_x + crop_w]

    # Insert it in the correct position in the output tensor
    padded_tensor[..., out_start_y:out_start_y + crop_h, out_start_x:out_start_x + crop_w] = cropped

    return padded_tensor

In [4]:
class CustomPadCropTransform:
    def __init__(self, target_size, position="center"):
        self.target_size = target_size
        self.position = position

    def __call__(self, tensor):
        return pad_or_truncate_tensor(tensor, self.target_size, self.position)

In [5]:
class BinaryThreshold:
    def __init__(self, threshold=0.5):
        """
        threshold: value between 0 and 1. Pixels above become 1, below become 0.
        """
        self.threshold = threshold

    def __call__(self, tensor):
        return (tensor > self.threshold).float()

In [6]:
class SobelEdgeDetection:
    def __init__(self):
        # Define Sobel kernels
        self.kernel_x = torch.tensor([[-1., 0., 1.],
                                      [-2., 0., 2.],
                                      [-1., 0., 1.]]).view(1, 1, 3, 3)
        self.kernel_y = torch.tensor([[-1., -2., -1.],
                                      [ 0.,  0.,  0.],
                                      [ 1.,  2.,  1.]]).view(1, 1, 3, 3)

    def __call__(self, tensor):
        if tensor.dim() == 3 and tensor.shape[0] == 3:
            # Convert RGB to grayscale
            tensor = 0.2989 * tensor[0] + 0.5870 * tensor[1] + 0.1140 * tensor[2]
            tensor = tensor.unsqueeze(0)  # Add channel dim back

        tensor = tensor.unsqueeze(0)  # Add batch dim

        # Apply filters (assume grayscale image [1, 1, H, W])
        edge_x = F.conv2d(tensor, self.kernel_x, padding=1)
        edge_y = F.conv2d(tensor, self.kernel_y, padding=1)

        # Compute edge magnitude
        edge = torch.sqrt(edge_x ** 2 + edge_y ** 2)

        # Remove batch/channel dims
        edge = edge.squeeze(0)

        # Normalize to [0, 1] range
        edge = (edge - edge.min()) / (edge.max() - edge.min() + 1e-6)

        return edge

In [7]:
class ResizeAndPadToFixed:
    def __init__(self, size, fill_color=0):
        self.target_w, self.target_h = size
        self.fill_color = fill_color

    def __call__(self, image):
        w, h = image.size

        # Resize with aspect ratio preserved
        scale = min(self.target_w / w, self.target_h / h)
        new_w = int(w * scale)
        new_h = int(h * scale)
        image = image.resize((new_w, new_h))

        # Pad to target size
        pad_w = self.target_w - new_w
        pad_h = self.target_h - new_h
        left = random.randint(0, pad_w) if pad_w > 0 else 0
        top = random.randint(0, pad_h) if pad_h > 0 else 0
        right = pad_w - left
        bottom = pad_h - top

        return ImageOps.expand(image, border=(left, top, right, bottom), fill=self.fill_color)

In [8]:
class GrayToRGB:
    def __call__(self, tensor):
        return tensor.expand(3, -1, -1)

In [9]:
print(train_dataset.classes)
print(valid_dataset.classes)

['CLEAN', 'CROSS', 'DIAGONAL', 'DOUBLE_LINE', 'SCRATCH', 'SINGLE_LINE', 'WAVE', 'ZIG_ZAG']
['CLEAN', 'CROSS', 'DIAGONAL', 'DOUBLE_LINE', 'SCRATCH', 'SINGLE_LINE', 'WAVE', 'ZIG_ZAG']


In [10]:
# batch size
BATCH_SIZE = 64

In [11]:
# resize 224x224
train_transform = transforms.Compose([
    ResizeAndPadToFixed((160, 80)),  # Guarantees fixed size (W=160, H=80)
    # transforms.RandomHorizontalFlip(p=0.5),
    # transforms.RandomVerticalFlip(p=0.5),
    # transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    # transforms.RandomRotation(degrees=(30, 70)),
    transforms.ToTensor(),
    SobelEdgeDetection(),
    GrayToRGB(),
    transforms.Normalize(
        mean=[0.5],
        std=[0.5]
    )
])
# the validation transforms
valid_transform = transforms.Compose([
    ResizeAndPadToFixed((160, 80)),  # Guarantees fixed size (W=160, H=80)
    transforms.ToTensor(),
    SobelEdgeDetection(),
    GrayToRGB(),
    transforms.Normalize(
        mean=[0.5],
        std=[0.5]
    )
])

In [12]:
# training dataset
train_dataset = datasets.ImageFolder(
    root=r'C:\Users\elias\Documents\Neural networks\Advanced\Project\cross_out_dataset\train\images',
    transform=train_transform
)
# validation dataset
valid_dataset = datasets.ImageFolder(
    root=r'C:\Users\elias\Documents\Neural networks\Advanced\Project\cross_out_dataset\val\images',
    transform=valid_transform
)
# training data loaders
train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=2, pin_memory=True
)
# validation data loaders
valid_loader = DataLoader(
    valid_dataset, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=2, pin_memory=True
)






In [None]:
images, labels = next(iter(train_loader))
print(labels)

In [None]:
# Define the model
model = resnet50(pretrained=True)

# Replace the last layer
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(train_dataset.classes))

In [None]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Move the model to the device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
def train():
    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Define datasets and loaders
    train_dataset = datasets.ImageFolder(
        root=r'path\to\train',
        transform=train_transform
    )
    valid_dataset = datasets.ImageFolder(
        root=r'path\to\val',
        transform=valid_transform
    )

    train_loader = DataLoader(
        train_dataset, batch_size=BATCH_SIZE, shuffle=True,
        num_workers=4, pin_memory=True, persistent_workers=True
    )
    valid_loader = DataLoader(
        valid_dataset, batch_size=BATCH_SIZE, shuffle=False,
        num_workers=4, pin_memory=True, persistent_workers=True
    )

    # Model, optimizer, criterion
    model = ...  # your model definition here
    model.to(device)

    optimizer = ...
    criterion = ...

    # Training loop
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * inputs.size(0)

        model.eval()
        val_loss = 0.0
        val_acc = 0.0
        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_acc += torch.sum(preds == labels.data)

        train_loss /= len(train_dataset)
        val_loss /= len(valid_dataset)
        val_acc = val_acc.double() / len(valid_dataset)
        print(f"Epoch [{epoch + 1}/{num_epochs}] "
              f"Train Loss: {train_loss:.4f} "
              f"Val Loss: {val_loss:.4f} "
              f"Val Acc: {val_acc:.4f}")

if __name__ == '__main__':
    train()

In [None]:
transform = transforms.Compose([
    ResizeByLongerSide((160, 80)),
    transforms.ToTensor(),
    #transforms.Grayscale(),
    #CustomPadCropTransform((128, 128), position="leftDown"),
    #SobelEdgeDetection(),
    #BinaryThreshold(threshold=0.2),
    transforms.Normalize(mean=[0.5], std=[0.5]),
    
    
])

In [None]:

dataset = torchvision.datasets.ImageFolder(
    root=r'C:\Users\elias\Documents\Neural networks\Advanced\Project\cross_out_dataset\train\images', 
    transform=transform
)

In [None]:
from matplotlib import pyplot as plt

img, label = dataset[290905]
print("Image shape:", img.shape)  # e.g. torch.Size([3, 128, 128])
print("Label:", label)

# Optional: show image
plt.imshow(img.permute(1, 2, 0))  # Convert from [C, H, W] to [H, W, C]
plt.title(f"Label: {label}")
plt.show()

In [None]:
# Load the same dataset with different transforms
#dataset1 = CIFAR10(root='./data', train=True, download=True, transform=transform1)
#dataset2 = CIFAR10(root='./data', train=True, download=True, transform=transform2)

# Combine datasets
#combined_dataset = ConcatDataset([dataset1, dataset2])