In [1]:
#%%
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
# %% import image
img = Image.open('B053.jpg')

In [3]:
img.size

(5896, 5768)

In [16]:
preprocess_steps = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.RandomRotation(50),
    transforms.CenterCrop(200),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])
x = preprocess_steps(img)

In [18]:
x.mean(), x.std()

(tensor(0.2928), tensor(0.1251))

In [19]:
# layer calculations
# e.g. nn.Conv3d() structure = [batch_size, channels, depth, height, width] e.g. [16,1,8,300,300]
# or like just LSD might be [16,1,8,300,300] but LSD + crop is [16,2,8,300,300]

In [47]:
#%% packages
from typing import OrderedDict
import torch
import torch.nn as nn

#%% sample input data of certain shape
input = torch.rand((1, 3, 32, 32))

# %%
model = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(3, 8, 3)), # out: (BS, 8, 30, 30)
    ('relu1', nn.ReLU()),
    ('pool', nn.MaxPool2d(2, 2)), # out: (BS, 8, 15, 15)
    ('conv2', nn.Conv2d(8, 16, 3)), # out: (BS, 16, 13, 13)
    ('relu2', nn.ReLU()),
    ('pool2', nn.MaxPool2d(2, 2)), # out: (BS, 16, 6, 6)
    ('flatten', nn.Flatten()),  # shape: (3, 16*6*6)
    ('fc1', nn.Linear(16 * 6 * 6, 127)),
    ('relu3', nn.ReLU()),
    ('fc2', nn.Linear(127, 64)),
    ('relu4', nn.ReLU()),
    ('fc3', nn.Linear(64, 1)),
    ('sigmoid', nn.Sigmoid())
]))

# %% test the model setup

In [48]:
model(input).shape

torch.Size([1, 1])

In [49]:
import segmentation_models_pytorch as smp

In [None]:
smp.Unet