In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class TinyFCN(nn.Module):
    def __init__(self, num_classes=3):
        super(TinyFCN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=1, stride=1),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=1, stride=2, padding=1),
        )
        self.classifier = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=num_classes, kernel_size=1),
        )


# Print the number of parameters in the model
model = TinyFCN()
print('Number of parameters: {}'.format(sum([p.numel() for p in model.parameters()])))

Number of parameters: 139811


In [17]:
import torch
import torch.nn as nn


class FCNRegression(nn.Module):
    def __init__(self):
        super(FCNRegression, self).__init__()

        # Capas convolucionales
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)

        # Capas totalmente convolucionales para regresión
        self.fc = nn.Conv2d(256, 1, kernel_size=1)  # Última capa con un solo canal de salida para regresión

    def forward(self, x):
        # Pasar por las capas convolucionales
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))

        # Pasar por la capa totalmente convolucional para regresión
        x = self.fc(x)

        return x


# Crear una instancia del modelo
model = FCNRegression()

print('Number of parameters: {}'.format(sum([p.numel() for p in model.parameters()])))

Number of parameters: 371073


In [18]:
import torch


class MYNet(nn.Module):
    def __init__(self):
        super(MYNet, self).__init__()
        self.model = torch.hub.load("pytorch/vision:v0.10.0", "fcn_resnet50", pretrained=True)
        self.model.classifier[4] = nn.Conv2d(512, 3, kernel_size=(1, 1), stride=(1, 1))

    def forward(self, x):
        return self.model(x)["out"]


model = MYNet()
print("Number of parameters: {}".format(sum([p.numel() for p in model.parameters()])))

Using cache found in C:\Users\Sergi/.cache\torch\hub\pytorch_vision_v0.10.0


Number of parameters: 35312984


In [19]:
from tqdm import tqdm


def train(model, device, train_loader, optimizer, epoch, log_interval=100, verbose=True):
    model.train()
    loss_v = 0

    for batch_idx, (data, target) in (t := tqdm(enumerate(train_loader), total=len(train_loader), disable=not verbose)):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)

        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0 and verbose:
            t.set_description(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    100.0 * batch_idx / len(train_loader),
                    loss.item(),
                )
            )
        loss_v += loss.item()
    loss_v /= len(train_loader.dataset)
    print("\nTrain set: Average loss: {:.4f}\n".format(loss_v))
    return loss_v


def test(model, device, test_loader, epoch, verbose=True):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(test_loader, total=len(test_loader), disable=not verbose, desc=f"Testing: {epoch}"):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction="sum").item()
            pred = output.argmax(dim=1, keepdim=True)  # obtener el índice de la probabilidad máxima
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
            test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
        )
    )

    return test_loss

In [20]:
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import cv2

path_train = os.path.join(os.getcwd(), "data", "aixi_shape_256_texture", "train")

files = os.listdir(path_train)

img_files = [os.path.join(path_train, p) for p in files if p.endswith(".png")]
label_files = [os.path.join(path_train, "gt", p) for p in files if p.endswith(".png")]


class AIXI_Shape(Dataset):
    def __init__(self, images, labels, transform=None):
        super().__init__()
        self.paths = images
        self.labels = labels
        self.len = len(self.paths)
        self.transform = transform

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        path = self.paths[index]
        label_path = self.labels[index]
        image = cv2.imread(path)
        label = cv2.imread(label_path)
        if self.transform is not None:
            image = self.transform(image)
            label = self.transform(label)
        return image, label


# image normalization
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=[1.9491e-05, 4.0427e-05, 3.6870e-05], std=[0.0003, 0.0004, 0.0004]),
    ]
)


# creació dels conjunts d'entrenament i test
train_ds = AIXI_Shape(img_files, label_files, transform)
# El test l'heu de crear vosaltres
train_dl = DataLoader(train_ds, batch_size=64)
train_dl.dataset[0][0].shape

torch.Size([3, 256, 256])

In [21]:
# Test
path_train = os.path.join(os.getcwd(), "data", "aixi_shape_256_texture", "val")

test_files = os.listdir(path_train)

test_img_files = [os.path.join(path_train, p) for p in files if p.endswith(".png")]
test_label_files = [os.path.join(path_train, "gt", p) for p in files if p.endswith(".png")]

test_ds = AIXI_Shape(test_img_files, test_label_files, transform)
test_dl = DataLoader(test_ds, batch_size=64)
test_dl.dataset[0][0].shape

torch.Size([3, 256, 256])

In [23]:
from torch import optim
import numpy as np

torch.manual_seed(33)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device {device}")

epochs = 15
lr = 10e-4

model = MYNet().to(device)
# Freeze the layers except the classifier
for param in model.parameters():
    param.requires_grad = False

for param in model.model.classifier.parameters():
    param.requires_grad = True

# print(model)

opt_adam = optim.Adam(model.parameters(), lr=lr)
opt_sgd = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

# Guardam el valor de pèrdua mig de cada iteració (època)
train_l = np.zeros((epochs))
test_l = np.zeros((epochs))

# Bucle d'entrenament
for epoch in range(0, epochs):
    optimizer = opt_adam if epoch < 10 else opt_sgd
    train_l[epoch] = train(model, device, train_dl, optimizer, epoch, log_interval=10)
    test_l[epoch] = test(model, device, test_dl, epoch)

Using device cuda


Using cache found in C:\Users\Sergi/.cache\torch\hub\pytorch_vision_v0.10.0



Train set: Average loss: -0.0775



Testing: 0:   0%|          | 0/157 [00:01<?, ?it/s]


RuntimeError: shape '[64, 1, 256, 256]' is invalid for input of size 12582912