In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class TinyFCN(nn.Module):
    def __init__(self, num_classes=3):
        super(TinyFCN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=1, stride=1),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(kernel_size=1, stride=2, padding=1),
        )
        self.classifier = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=num_classes, kernel_size=1),
        )


# Print the number of parameters in the model
model = TinyFCN()
print('Number of parameters: {}'.format(sum([p.numel() for p in model.parameters()])))

Number of parameters: 139811


In [17]:
import torch
import torch.nn as nn


class FCNRegression(nn.Module):
    def __init__(self):
        super(FCNRegression, self).__init__()

        # Capas convolucionales
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)

        # Capas totalmente convolucionales para regresión
        self.fc = nn.Conv2d(256, 1, kernel_size=1)  # Última capa con un solo canal de salida para regresión

    def forward(self, x):
        # Pasar por las capas convolucionales
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))

        # Pasar por la capa totalmente convolucional para regresión
        x = self.fc(x)

        return x


# Crear una instancia del modelo
model = FCNRegression()

print('Number of parameters: {}'.format(sum([p.numel() for p in model.parameters()])))

Number of parameters: 371073


In [18]:
import torch


class MYNet(nn.Module):
    def __init__(self):
        super(MYNet, self).__init__()
        self.model = torch.hub.load("pytorch/vision:v0.10.0", "fcn_resnet50", pretrained=True)
        self.model.classifier[4] = nn.Conv2d(512, 3, kernel_size=(1, 1), stride=(1, 1))

    def forward(self, x):
        return self.model(x)["out"]


model = MYNet()
print("Number of parameters: {}".format(sum([p.numel() for p in model.parameters()])))

Using cache found in C:\Users\Sergi/.cache\torch\hub\pytorch_vision_v0.10.0


Number of parameters: 35312984


In [19]:
from tqdm import tqdm


def train(model, device, train_loader, optimizer, epoch, log_interval=100, verbose=True):
    model.train()
    loss_v = 0

    for batch_idx, (data, target) in (t := tqdm(enumerate(train_loader), total=len(train_loader), disable=not verbose)):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)

        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0 and verbose:
            t.set_description(
                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                    epoch,
                    batch_idx * len(data),
                    len(train_loader.dataset),
                    100.0 * batch_idx / len(train_loader),
                    loss.item(),
                )
            )
        loss_v += loss.item()
    loss_v /= len(train_loader.dataset)
    print("\nTrain set: Average loss: {:.4f}\n".format(loss_v))
    return loss_v


def test(model, device, test_loader, epoch, verbose=True):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in tqdm(test_loader, total=len(test_loader), disable=not verbose, desc=f"Testing: {epoch}"):
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target, reduction="sum").item()
            pred = output.argmax(dim=1, keepdim=True)  # obtener el índice de la probabilidad máxima
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(
        "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
            test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset)
        )
    )

    return test_loss

In [20]:
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import cv2

path_train = os.path.join(os.getcwd(), "data", "aixi_shape_256_texture", "train")

files = os.listdir(path_train)

img_files = [os.path.join(path_train, p) for p in files if p.endswith(".png")]
label_files = [os.path.join(path_train, "gt", p) for p in files if p.endswith(".png")]


class AIXI_Shape(Dataset):
    def __init__(self, images, labels, transform=None):
        super().__init__()
        self.paths = images
        self.labels = labels
        self.len = len(self.paths)
        self.transform = transform

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        path = self.paths[index]
        label_path = self.labels[index]
        image = cv2.imread(path)
        label = cv2.imread(label_path)
        if self.transform is not None:
            image = self.transform(image)
            label = self.transform(label)
        return image, label


# image normalization
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=[1.9491e-05, 4.0427e-05, 3.6870e-05], std=[0.0003, 0.0004, 0.0004]),
    ]
)


# creació dels conjunts d'entrenament i test
train_ds = AIXI_Shape(img_files, label_files, transform)
# El test l'heu de crear vosaltres
train_dl = DataLoader(train_ds, batch_size=64)
train_dl.dataset[0][0].shape

torch.Size([3, 256, 256])

In [21]:
# Test
path_train = os.path.join(os.getcwd(), "data", "aixi_shape_256_texture", "val")

test_files = os.listdir(path_train)

test_img_files = [os.path.join(path_train, p) for p in files if p.endswith(".png")]
test_label_files = [os.path.join(path_train, "gt", p) for p in files if p.endswith(".png")]

test_ds = AIXI_Shape(test_img_files, test_label_files, transform)
test_dl = DataLoader(test_ds, batch_size=64)
test_dl.dataset[0][0].shape

torch.Size([3, 256, 256])

In [23]:
from torch import optim
import numpy as np

torch.manual_seed(33)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device {device}")

epochs = 15
lr = 10e-4

model = MYNet().to(device)
# Freeze the layers except the classifier
for param in model.parameters():
    param.requires_grad = False

for param in model.model.classifier.parameters():
    param.requires_grad = True

# print(model)

opt_adam = optim.Adam(model.parameters(), lr=lr)
opt_sgd = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

# Guardam el valor de pèrdua mig de cada iteració (època)
train_l = np.zeros((epochs))
test_l = np.zeros((epochs))

# Bucle d'entrenament
for epoch in range(0, epochs):
    optimizer = opt_adam if epoch < 10 else opt_sgd
    train_l[epoch] = train(model, device, train_dl, optimizer, epoch, log_interval=10)
    test_l[epoch] = test(model, device, test_dl, epoch)

Using device cuda


Using cache found in C:\Users\Sergi/.cache\torch\hub\pytorch_vision_v0.10.0



Train set: Average loss: -0.0775



Testing: 0:   0%|          | 0/157 [00:01<?, ?it/s]


RuntimeError: shape '[64, 1, 256, 256]' is invalid for input of size 12582912

In [8]:
import torch
import os

# Model
model = torch.hub.load("ultralytics/yolov5", "yolov5s", pretrained=True)

# Images
imgs = [
    "https://ultralytics.com/images/zidane.jpg",
    "https://ultralytics.com/images/bus.jpg",
]

# Inference
results = model(imgs)

# Results
results.print()
results.save()  # or .show()

results.xyxy[0]  # img1 predictions (tensor)
results.pandas().xyxy[0]

Using cache found in C:\Users\Sergi/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-12-8 Python-3.11.5 torch-2.1.0 CUDA:0 (NVIDIA GeForce RTX 3060, 12287MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 
image 1/2: 720x1280 2 persons, 2 ties
image 2/2: 1080x810 4 persons, 1 bus
Speed: 1504.4ms pre-process, 75.5ms inference, 4.0ms NMS per image at shape (2, 3, 640, 640)
Saved 2 images to [1mruns\detect\exp6[0m


Unnamed: 0,xmin,ymin,xmax,ymax,confidence,class,name
0,742.569519,48.040802,1141.216553,716.655273,0.881913,0,person
1,442.064697,437.536072,496.798157,709.869446,0.687422,27,tie
2,125.24646,193.684296,712.012634,713.056274,0.638556,0,person
3,982.883362,308.352966,1027.358154,420.091248,0.261698,27,tie


In [3]:
from collections import OrderedDict


class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, init_features=32):
        super(UNet, self).__init__()

        features = init_features

        ## CODER
        self.encoder1 = UNet._block(in_channels, features, name="enc1")
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder2 = UNet._block(features, features * 2, name="enc2")
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")

        ## DECODER
        # TODO: Construeix el teu decoder
        self.upconv4 = nn.ConvTranspose2d(features * 16, features * 8, kernel_size=2, stride=2)
        self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
        self.upconv3 = nn.ConvTranspose2d(features * 8, features * 4, kernel_size=2, stride=2)
        self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
        self.upconv2 = nn.ConvTranspose2d(features * 4, features * 2, kernel_size=2, stride=2)
        self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
        self.upconv1 = nn.ConvTranspose2d(features * 2, features, kernel_size=2, stride=2)
        self.decoder1 = UNet._block(features * 2, features, name="dec1")

        self.conv = nn.Conv2d(in_channels=features, out_channels=out_channels, kernel_size=1)

    def forward(self, x):
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool1(enc1))
        enc3 = self.encoder3(self.pool2(enc2))
        enc4 = self.encoder4(self.pool3(enc3))

        bottleneck = self.bottleneck(self.pool4(enc4))

        dec4 = self.upconv4(bottleneck)
        dec4 = torch.cat((dec4, enc4), dim=1)
        dec4 = self.decoder4(dec4)
        dec3 = self.upconv3(dec4)
        dec3 = torch.cat((dec3, enc3), dim=1)
        dec3 = self.decoder3(dec3)
        dec2 = self.upconv2(dec3)
        dec2 = torch.cat((dec2, enc2), dim=1)
        dec2 = self.decoder2(dec2)
        dec1 = self.upconv1(dec2)
        dec1 = torch.cat((dec1, enc1), dim=1)
        dec1 = self.decoder1(dec1)

        # Aplicarem una sigmoide a la sortida de la xarxa -> TODO: Recordar el que ha dit en biel a classe
        return torch.sigmoid(self.conv(dec1))

    # Ara ja podem començar a fer coses amb cara i ulls
    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            OrderedDict(
                [
                    (
                        name + "conv1",
                        nn.Conv2d(
                            in_channels=in_channels,
                            out_channels=features,
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm1", nn.BatchNorm2d(num_features=features)),
                    (name + "relu1", nn.ReLU(inplace=True)),
                    (
                        name + "conv2",
                        nn.Conv2d(
                            in_channels=features,
                            out_channels=features,
                            kernel_size=3,
                            padding=1,
                            bias=False,
                        ),
                    ),
                    (name + "norm2", nn.BatchNorm2d(num_features=features)),
                    (name + "relu2", nn.ReLU(inplace=True)),
                ]
            )
        )

unet = UNet()
print('Number of parameters: {}'.format(sum([p.numel() for p in unet.parameters()])))

Number of parameters: 7763041
