In [15]:
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F

import os
from pathlib import Path
import numpy as np
from glob import glob
import glob
import random

from torchvision import transforms
from collections import defaultdict

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [16]:
class GeoImageDataset(Dataset):
    def __init__(self, img_dir: Path, mask_dir:Path, transform=None):
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.img_files = os.listdir(self.img_dir)
        self.mask_files = os.listdir(self.mask_dir)
        self.transform = transform
        

    def __len__(self):
        return len(self.img_files)
        
    def __getitem__(self, idx):
        # Load image
        img_path = self.img_dir / self.img_files[idx]
        # mask and img_file have so far the same name
        mask_path = self.mask_dir / self.img_files[idx]
        img = torch.load(img_path)
        # converts bool mask into integer (0/1)
        mask = torch.load(mask_path).long()
        # Apply transform (if any)
        if self.transform:
            img = self.transform(img)
        
        return img, mask #, img_path, mask_path

In [17]:
root = Path(r'C:\Users\Fabian\Documents\Github_Masterthesis\Solarpark-detection\data_local\data_splitted')
train_img_dir = root / "train/images"
train_mask_dir = root / "train/masks"

val_img_dir = root / "val/images"
val_mask_dir = root / "val/masks"

test_img_dir = root / "test/images"
test_mask_dir = root / "test/masks"

train_dataset = GeoImageDataset(train_img_dir, train_mask_dir)
val_dataset = GeoImageDataset(val_img_dir, val_mask_dir)
test_dataset = GeoImageDataset(test_img_dir, test_mask_dir)

In [18]:
batch_size = 32
train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
)

test_dataloader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=True
)
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [19]:
from torch.optim.lr_scheduler import OneCycleLR
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [20]:
from typing import Any, Callable, Tuple, Union
def train(
    dataloader: Any,
    model: nn.Module,
    loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
    # Callable[[torch.Tensor, torch.Tensor], Union[Any, torch.Tensor]]
    optimizer: Any,
) -> torch.Tensor:
    size = len(dataloader.dataset)

    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        # ! check why we need to squeeze and convert to float32
        loss = loss_fn(pred.squeeze(1), y.to(torch.float32))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [21]:
from torchmetrics.classification import BinaryJaccardIndex
def test(
        dataloader: Any,
        model: nn.Module,
        loss_fn: Any,
    ) -> Union[torch.Tensor, torch.Tensor]:
        size = len(dataloader.dataset)
        num_batches = len(dataloader)
        model.eval()
        test_loss, correct = 0, 0

        metric = BinaryJaccardIndex().to(device)

        with torch.no_grad():
            for X, y in dataloader:
                X, y = X.to(device), y.to(device)
                pred = model(X)
                # test_loss += loss_fn(pred, y).item()
                # ToDo: calculate average loss
                test_loss += loss_fn(pred.squeeze(1), y.to(torch.float32)).item()
                loss = loss_fn(pred.squeeze(1), y.to(torch.float32)).item()

                # accuracy
                # correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        # ToDo: fix typing
        test_loss /= num_batches  # type: ignore
        correct /= size  # type: ignore
        jaccard_idx = 100 * metric(pred.squeeze(1), y)
        print(
            f"Test Error: \n"
            f"Jaccard-Index: {(jaccard_idx):>0.3f}%, Avg loss: {test_loss:>5f} \n"
        )
        # ToDo: fix typing
        # ToDo: return average loss
        return loss, jaccard_idx.item()  # type: ignore

In [2]:
from torchvision.io.image import read_image
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
from torchvision.transforms.functional import to_pil_image

# img = read_image("gallery/assets/dog1.jpg")

# Step 1: Initialize model with the best available weights
weights = FCN_ResNet50_Weights.DEFAULT
model = fcn_resnet50(weights=weights)
model.eval()

# # Step 2: Initialize the inference transforms
# preprocess = weights.transforms()

# # Step 3: Apply inference preprocessing transforms
# batch = preprocess(img).unsqueeze(0)

# # Step 4: Use the model and visualize the prediction
# prediction = model(batch)["out"]
# normalized_masks = prediction.softmax(dim=1)
# class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])}
# mask = normalized_masks[0, class_to_idx["dog"]]
# to_pil_image(mask).show()

Downloading: "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth" to C:\Users\Fabian/.cache\torch\hub\checkpoints\fcn_resnet50_coco-1167a1af.pth
100%|██████████| 135M/135M [00:06<00:00, 22.2MB/s] 


FCN(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequenti

In [4]:
import torch

model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('model_scripted.pt') # Save



In [5]:
import torch
import torchvision.models as models

model = models.__dict__['efficientnet_b0'](pretrained=True)
model.fc = None

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to C:\Users\Fabian/.cache\torch\hub\checkpoints\efficientnet_b0_rwightman-3dd342df.pth
100%|██████████| 20.5M/20.5M [00:01<00:00, 15.6MB/s]


In [31]:
import torch.nn as nn
class MySegmentationModel(nn.Module):
    def __init__(self, num_classes):
        super(MySegmentationModel, self).__init__()
        self.backbone = models.__dict__['efficientnet_b0'](pretrained=True)
        self.backbone.conv_stem = nn.Conv2d(4, 32, kernel_size=3, stride=2, bias=False)# Remove first layer
        self.classifier = nn.Conv2d(1280, num_classes, kernel_size=3)

    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

In [32]:
# Define your model and optimizer
model = MySegmentationModel(num_classes=1)

In [33]:
epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------


RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[32, 4, 256, 256] to have 3 channels, but got 4 channels instead

In [30]:
model = MySegmentationModel(num_classes=10)

# Generieren Sie einige zufällige Eingabedaten
batch_size = 32
in_channels = 4
height = 256
width = 256

inputs = torch.randn(batch_size, in_channels, height, width)

# Führen Sie die Eingabedaten durch das Modell
outputs = model(inputs)

RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[32, 4, 256, 256] to have 3 channels, but got 4 channels instead

In [13]:
model_scripted = torch.jit.script(model) # Export to TorchScript
model_scripted.save('model_scripted.pt') # Save

In [14]:
from torchvision.models.segmentation import unet

ImportError: cannot import name 'unet' from 'torchvision.models.segmentation' (c:\Users\Fabian\Documents\Github_Masterthesis\Solarpark-detection\.venv\lib\site-packages\torchvision\models\segmentation\__init__.py)