In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

import numpy as np
import pandas as pd

from PIL import Image

from sklearn.model_selection import train_test_split

In [2]:
class ImageDataset(Dataset):
    def __init__(self, path: str, data: pd.DataFrame, transform=None) -> None:
        self.image_paths = np.array(
            [path + filename for filename in data["filename"].to_numpy()]
        )
        self.labels = data["epsilon"].to_numpy()
        self.transform = transform

    def __getitem__(self, inx: int) -> tuple:
        image_path = self.image_paths[inx]
        label_float_epsilon = self.labels[inx]
        target = np.zeros(shape=(1000,))
        target[int(label_float_epsilon * 1000)] = 1
        image = Image.open(image_path)
        image = np.array(image)
        # repeat grayscale value three times for all RGB channels
        image = np.repeat(image[..., np.newaxis], 3, -1)
        if self.transform:
            image = self.transform(image)
        return image, torch.from_numpy(target)

    def __len__(self) -> int:
        return len(self.image_paths)

In [3]:
transform = transforms.Compose(
    [
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5], std=[0.5]),
    ]
)

In [4]:
main_path = "../../data/generated/perlin/pizza/"
df = pd.read_csv(main_path + "parameters.csv")

In [5]:
train, valid = train_test_split(
    df, test_size=0.3, random_state=12, shuffle=True, stratify=df["epsilon"]
)

In [6]:
train_dataset = ImageDataset(path=main_path, data=train, transform=transform)
valid_dataset = ImageDataset(path=main_path, data=valid, transform=transform)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True, num_workers=0)

In [5]:
device = "cuda" if torch.cuda.is_available else "cpu"
lr = 1e-4
epochs = 10
print(device)

cuda


In [9]:
lr

0.0001

In [6]:
import torch.nn as nn
from torchvision import models
import torch.optim as optim

In [7]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)

In [8]:
model = models.resnet18(pretrained=True)

classifier = nn.Sequential(
    nn.Linear(512, 512), nn.ReLU(), nn.Dropout(0.3), nn.ReLU(), nn.Linear(512, 1000)
)

model.fc = classifier



In [10]:

# Learning rates for different parameter groups
lr_classifier = 0.001  # Learning rate for classifier
lr_resnet = 0.0001  # Learning rate for ResNet backbone

# Define optimizer with parameter groups
optimizer = optim.AdamW(
    [
        {"params": model.fc.parameters(), "lr": lr_classifier},  # Classifier params
        {"params": model.conv1.parameters(), "lr": lr_resnet},  # ResNet layers
        {"params": model.bn1.parameters(), "lr": lr_resnet},
        {"params": model.layer1.parameters(), "lr": lr_resnet},
        {"params": model.layer2.parameters(), "lr": lr_resnet},
        {"params": model.layer3.parameters(), "lr": lr_resnet},
        {"params": model.layer4.parameters(), "lr": lr_resnet},
    ],
    eps=1e-8,
)

# Cosine Annealing with Warm Restarts scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer, T_0=5, T_mult=2
)