In [36]:
# %%
from typing import LiteralString
from tqdm import tqdm
import cv2
import os
import json
from PIL import Image
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models.segmentation import deeplabv3_resnet50
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
import numpy as np
import torch.nn.functional as F
from scipy.special import softmax

In [18]:
folder_path = '../9517proj_sources/train'
annotation_path = '../9517proj_sources/train_annotations'

with open(annotation_path, 'r') as f:
    annotations = json.load(f)
    
for anno in annotations:
    image_id = anno['image_id']
    bbox = anno['bbox']
    category_id = anno['category_id']
    
    # build image path, absolute path+ na me
    image_file_path = os.path.join(folder_path, f"image_id_{str(image_id).zfill(3)}.jpg")
    
    image = cv2.imread(image_file_path)
    
    if image is None:
        print(f"File not found: {image_file_path}")
        continue
    
    # draw bounding box
    x, y, width, height = bbox
    cv2.rectangle(image, (int(x), int(y)), (int(x+width), int(y+height)), (0, 255, 0), 2)
    
    # show image

# 1

In [15]:
# %%
class CustomDataset(Dataset):
    def __init__(self, img_folder, img_ext, mask_folder, mask_ext, transform=None, mask_transform=None):
        self.img_folder = img_folder
        self.img_ext = img_ext
        self.mask_folder = mask_folder
        self.mask_ext = mask_ext
        self.transform = transform
        self.mask_transform = mask_transform
        self.filenames = [os.path.splitext(filename)[0] for filename in os.listdir(img_folder)]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_folder, self.filenames[idx] + self.img_ext)
        mask_path = os.path.join(self.mask_folder, self.filenames[idx] + self.mask_ext)
        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')  # assuming masks are in 'L' mode

        if self.transform:
            image = self.transform(image)

        if self.mask_transform:
            mask = self.mask_transform(mask)
            mask = torch.squeeze(mask, 0)  # remove the first dimension (1, H, W) -> (H, W)

        return image, mask


transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

mask_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor()
])

path_folder = '../9517proj_sources/train'

path_folder_2 = '../9517proj_sources/off'

train_dataset = CustomDataset(path_folder, '.jpg', path_folder_2, '.png', transform=transform, mask_transform=mask_transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# create model
model = deeplabv3_resnet50(pretrained=False, progress=True, num_classes=2, aux_loss=None)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss Function and Optimizer
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001)


for epoch in tqdm(range(10)):  # suppose we train for 10 epochs
    try:
        for images, masks in train_loader:
            images = images.to(device)
            masks = masks.to(device)

            # forward pass
            outputs = model(images)['out']
            loss = criterion(outputs, masks.long())  # note we need to convert masks to 'long' type

            # backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            print('Epoch [{}/10], Loss: {:.4f}'.format(epoch + 1, loss.item()))
# for epoch in range(10):  # suppose we train for 10 epochs
#     try:
#         for images, masks in train_loader:
#             print(images.shape, masks.shape)
#             images = images.to('cuda') if torch.cuda.is_available() else images
#             masks = masks.to('cuda') if torch.cuda.is_available() else masks
#
#             # forward pass
#             outputs = model(images)['out']
#             loss = criterion(outputs, masks.long())  # note we need to convert masks to 'long' type
#
#             # backward and optimize
#             optimizer.zero_grad()
#             loss.backward()
#             optimizer.step()
#
#             print('Epoch [{}/10], Loss: {:.4f}'.format(epoch + 1, loss.item()))
    except Exception as e:
        print(e)
        continue


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch [1/10], Loss: 0.7693
Epoch [1/10], Loss: 0.7586
Epoch [1/10], Loss: 0.7417
Epoch [1/10], Loss: 0.7265
Epoch [1/10], Loss: 0.7155


 10%|█         | 1/10 [00:03<00:30,  3.42s/it]

Epoch [1/10], Loss: 0.7152
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_184.png'
Epoch [2/10], Loss: 0.7031
Epoch [2/10], Loss: 0.6843
Epoch [2/10], Loss: 0.6777
Epoch [2/10], Loss: 0.6676
Epoch [2/10], Loss: 0.6558
Epoch [2/10], Loss: 0.6406
Epoch [2/10], Loss: 0.6398
Epoch [2/10], Loss: 0.6300
Epoch [2/10], Loss: 0.6320
Epoch [2/10], Loss: 0.6132
Epoch [2/10], Loss: 0.5859
Epoch [2/10], Loss: 0.5893
Epoch [2/10], Loss: 0.5789
Epoch [2/10], Loss: 0.5713


 20%|██        | 2/10 [00:11<00:51,  6.38s/it]

Epoch [2/10], Loss: 0.5687
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_465.png'
Epoch [3/10], Loss: 0.5505
Epoch [3/10], Loss: 0.5467
Epoch [3/10], Loss: 0.5331
Epoch [3/10], Loss: 0.5400
Epoch [3/10], Loss: 0.5136
Epoch [3/10], Loss: 0.5179
Epoch [3/10], Loss: 0.5085
Epoch [3/10], Loss: 0.4958
Epoch [3/10], Loss: 0.4937
Epoch [3/10], Loss: 0.4616
Epoch [3/10], Loss: 0.4713
Epoch [3/10], Loss: 0.4682
Epoch [3/10], Loss: 0.4565
Epoch [3/10], Loss: 0.4470
Epoch [3/10], Loss: 0.4422
Epoch [3/10], Loss: 0.4348
Epoch [3/10], Loss: 0.4260
Epoch [3/10], Loss: 0.4172
Epoch [3/10], Loss: 0.4202
Epoch [3/10], Loss: 0.4091
Epoch [3/10], Loss: 0.3977
Epoch [3/10], Loss: 0.3980
Epoch [3/10], Loss: 0.3919
Epoch [3/10], Loss: 0.3801
Epoch [3/10], Loss: 0.3755


 30%|███       | 3/10 [00:26<01:10, 10.05s/it]

Epoch [3/10], Loss: 0.3671
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_414.png'
Epoch [4/10], Loss: 0.3663
Epoch [4/10], Loss: 0.3575
Epoch [4/10], Loss: 0.3514
Epoch [4/10], Loss: 0.3541
Epoch [4/10], Loss: 0.3433
Epoch [4/10], Loss: 0.3428
Epoch [4/10], Loss: 0.3257
Epoch [4/10], Loss: 0.3264
Epoch [4/10], Loss: 0.3200
Epoch [4/10], Loss: 0.3122
Epoch [4/10], Loss: 0.3119
Epoch [4/10], Loss: 0.2996
Epoch [4/10], Loss: 0.2961
Epoch [4/10], Loss: 0.2990
Epoch [4/10], Loss: 0.2906
Epoch [4/10], Loss: 0.2964
Epoch [4/10], Loss: 0.2885
Epoch [4/10], Loss: 0.2715
Epoch [4/10], Loss: 0.2639
Epoch [4/10], Loss: 0.2669
Epoch [4/10], Loss: 0.2635
Epoch [4/10], Loss: 0.2612
Epoch [4/10], Loss: 0.2485
Epoch [4/10], Loss: 0.2499
Epoch [4/10], Loss: 0.2468
Epoch [4/10], Loss: 0.2386
Epoch [4/10], Loss: 0.2400
Epoch [4/10], Loss: 0.2338
Epoch [4/10], Loss: 0.2247
Epoch [4/10], Loss: 0.2314
Epoch [4/10], Loss: 0.2246
Epoch [4/10], Loss: 0.2224


 40%|████      | 4/10 [00:44<01:18, 13.12s/it]

Epoch [4/10], Loss: 0.2165
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_478.png'


 50%|█████     | 5/10 [00:44<00:43,  8.60s/it]

Epoch [5/10], Loss: 0.2243
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_412.png'
Epoch [6/10], Loss: 0.2090
Epoch [6/10], Loss: 0.2143
Epoch [6/10], Loss: 0.1980
Epoch [6/10], Loss: 0.1934
Epoch [6/10], Loss: 0.2059
Epoch [6/10], Loss: 0.1998
Epoch [6/10], Loss: 0.1939
Epoch [6/10], Loss: 0.1891
Epoch [6/10], Loss: 0.1856
Epoch [6/10], Loss: 0.1782
Epoch [6/10], Loss: 0.1730
Epoch [6/10], Loss: 0.1903
Epoch [6/10], Loss: 0.1847
Epoch [6/10], Loss: 0.1783
Epoch [6/10], Loss: 0.1641
Epoch [6/10], Loss: 0.1691
Epoch [6/10], Loss: 0.1716


 60%|██████    | 6/10 [00:54<00:35,  8.96s/it]

Epoch [6/10], Loss: 0.1606
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_320.png'
Epoch [7/10], Loss: 0.1516
Epoch [7/10], Loss: 0.1594
Epoch [7/10], Loss: 0.1530
Epoch [7/10], Loss: 0.1534
Epoch [7/10], Loss: 0.1543
Epoch [7/10], Loss: 0.1523
Epoch [7/10], Loss: 0.1512
Epoch [7/10], Loss: 0.1467
Epoch [7/10], Loss: 0.1399
Epoch [7/10], Loss: 0.1459
Epoch [7/10], Loss: 0.1440
Epoch [7/10], Loss: 0.1424
Epoch [7/10], Loss: 0.1346
Epoch [7/10], Loss: 0.1324
Epoch [7/10], Loss: 0.1377
Epoch [7/10], Loss: 0.1305
Epoch [7/10], Loss: 0.1297
Epoch [7/10], Loss: 0.1285


 70%|███████   | 7/10 [01:04<00:28,  9.37s/it]

Epoch [7/10], Loss: 0.1189
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_412.png'


 80%|████████  | 8/10 [01:05<00:13,  6.63s/it]

Epoch [8/10], Loss: 0.1253
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_478.png'
Epoch [9/10], Loss: 0.1327
Epoch [9/10], Loss: 0.1190
Epoch [9/10], Loss: 0.1248
Epoch [9/10], Loss: 0.1211
Epoch [9/10], Loss: 0.1129
Epoch [9/10], Loss: 0.1153
Epoch [9/10], Loss: 0.1103
Epoch [9/10], Loss: 0.1121
Epoch [9/10], Loss: 0.1123
Epoch [9/10], Loss: 0.1072
Epoch [9/10], Loss: 0.1092


 90%|█████████ | 9/10 [01:11<00:06,  6.60s/it]

Epoch [9/10], Loss: 0.1028
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_414.png'
Epoch [10/10], Loss: 0.1053
Epoch [10/10], Loss: 0.1044
Epoch [10/10], Loss: 0.1039
Epoch [10/10], Loss: 0.1075
Epoch [10/10], Loss: 0.1017
Epoch [10/10], Loss: 0.0955


100%|██████████| 10/10 [01:15<00:00,  7.56s/it]

Epoch [10/10], Loss: 0.0996
[Errno 2] No such file or directory: '../9517proj_sources/off\\image_id_478.png'





# 1

In [21]:
class CustomDataset(Dataset):
    def __init__(self, img_folder, img_ext, mask_folder, mask_ext, transform=None, mask_transform=None):
        self.img_folder = img_folder
        self.img_ext = img_ext
        self.mask_folder = mask_folder
        self.mask_ext = mask_ext
        self.transform = transform
        self.mask_transform = mask_transform

        self.filenames = [os.path.splitext(filename)[0] for filename in os.listdir(img_folder)
                          if os.path.exists(os.path.join(mask_folder, os.path.splitext(filename)[0] + mask_ext))]

    # ... rest of the class remains the same ...
    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_folder, self.filenames[idx] + self.img_ext)
        mask_path = os.path.join(self.mask_folder, self.filenames[idx] + self.mask_ext)
        image = Image.open(img_path).convert('RGB')
        mask = Image.open(mask_path).convert('L')  # assuming masks are in 'L' mode

        if self.transform:
            image = self.transform(image)

        if self.mask_transform:
            mask = self.mask_transform(mask)
            mask = torch.squeeze(mask, 0)  # remove the first dimension (1, H, W) -> (H, W)

        return image, mask


transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Modified image size
    transforms.ToTensor()
])

mask_transform = transforms.Compose([
    transforms.Resize((32, 32)),  # Modified mask size
    transforms.ToTensor()
])

# ... rest of the setup remains the same ...
path_folder = '../9517proj_sources/train'

path_folder_2 = '../9517proj_sources/off'

train_dataset = CustomDataset(path_folder, '.jpg', path_folder_2, '.png', transform=transform, mask_transform=mask_transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# create model
model = deeplabv3_resnet50(pretrained=False, progress=True, num_classes=2, aux_loss=None)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss Function and Optimizer
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001)

for epoch in range(10):  # suppose we train for 10 epochs
    for images, masks in train_loader:
        images = images.to(device)
        masks = masks.to(device)

        # forward pass
        outputs = model(images)['out']

        # resize masks and outputs for the loss function
        outputs = F.interpolate(outputs, size=(masks.shape[1], masks.shape[2]), mode='bilinear', align_corners=False)

        loss = criterion(outputs, masks.long())  # note we need to convert masks to 'long' type

        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print('Epoch [{}/10], Loss: {:.4f}'.format(epoch + 1, loss.item()))

    # Save the model after each epoch
    torch.save(model.state_dict(), 'model_epoch_{}.pth'.format(epoch+1))

# Load the model
# Here we use the model from the last epoch, but you can replace '10' with any epoch number to load that model.
model.load_state_dict(torch.load('model_epoch_10.pth'))


Epoch [1/10], Loss: 0.7966
Epoch [1/10], Loss: 0.7951
Epoch [1/10], Loss: 0.7847
Epoch [1/10], Loss: 0.7553
Epoch [1/10], Loss: 0.7555
Epoch [1/10], Loss: 0.7403
Epoch [1/10], Loss: 0.7270
Epoch [1/10], Loss: 0.7043
Epoch [1/10], Loss: 0.7060
Epoch [1/10], Loss: 0.6885
Epoch [1/10], Loss: 0.6749
Epoch [1/10], Loss: 0.6746
Epoch [1/10], Loss: 0.6545
Epoch [1/10], Loss: 0.6525
Epoch [1/10], Loss: 0.6597
Epoch [1/10], Loss: 0.6295
Epoch [1/10], Loss: 0.6236
Epoch [1/10], Loss: 0.6077
Epoch [1/10], Loss: 0.5889
Epoch [1/10], Loss: 0.5857
Epoch [1/10], Loss: 0.5744
Epoch [1/10], Loss: 0.5563
Epoch [1/10], Loss: 0.5568
Epoch [1/10], Loss: 0.5366
Epoch [1/10], Loss: 0.5310
Epoch [1/10], Loss: 0.5352
Epoch [1/10], Loss: 0.5189
Epoch [1/10], Loss: 0.5119
Epoch [1/10], Loss: 0.4979
Epoch [1/10], Loss: 0.4907
Epoch [1/10], Loss: 0.4658
Epoch [1/10], Loss: 0.4650
Epoch [1/10], Loss: 0.4663
Epoch [1/10], Loss: 0.4471
Epoch [1/10], Loss: 0.4397
Epoch [1/10], Loss: 0.4279
Epoch [1/10], Loss: 0.4386
E

<All keys matched successfully>

In [22]:
torch.save(model.state_dict(), 'model_weights.pth')

# 1

In [25]:
# %%
model.load_state_dict(torch.load('model_epoch_9.pth'))
model.eval()
# 如果你有可用的GPU，可以将模型放到GPU上
if torch.cuda.is_available():
    model = model.to('cuda')

test_folder: LiteralString = '../9517proj_sources/valid/valid'
test_ext = '.jpg'
os.makedirs('output_folder', exist_ok=True)
output_folder = '../9517proj_sources/output_folder'

test_filenames = [os.path.splitext(filename)[0] for filename in os.listdir(test_folder)]

with torch.no_grad():  # we don't need gradients for testing
    for filename in test_filenames:
        img_path = os.path.join(test_folder, filename + test_ext)
        image = Image.open(img_path).convert('RGB')
        orig_size = (image.width, image.height)
        image = transform(image)  # apply the same transform as during training
        image = image.unsqueeze(0)  # add a batch dimension

        image = image.to('cuda') if torch.cuda.is_available() else image

        output = model(image)['out']
        output = torch.argmax(output, dim=1)  # get the most likely prediction

        # resize the output to match the original image size
        output = cv2.resize(output[0].cpu().numpy(), orig_size, interpolation=cv2.INTER_NEAREST)
        # print(np.unique(output))
        # print(output)
        # get the original image
        orig_img = cv2.imread(img_path)

        # apply the mask to the original image
        segmented_img = np.zeros_like(orig_img)
        for i in range(3):  # for each color channel
            segmented_img[:, :, i] = np.where(output == 1, orig_img[:, :, i], 0)
        # print(np.unique(segmented_img))
        # save the segmented image
        # segmented_img = (segmented_img * 255).astype(np.uint8)
        res = cv2.imwrite(filename + '_segmented.jpg', segmented_img)
        print(res)


True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


# 2

In [48]:
# Load the model
model.load_state_dict(torch.load('model_epoch_10.pth'))
model.eval()

if torch.cuda.is_available():
    model = model.to('cuda')

# Change the path as needed
test_folder = '../9517proj_sources/valid/valid'
test_ext = '.jpg'
os.makedirs('output_folder', exist_ok=True)
output_folder = '../9517proj_sources/output_folder'

test_filenames = [os.path.splitext(filename)[0] for filename in os.listdir(test_folder)]

softmax = torch.nn.Softmax(dim=1)  # Softmax for converting output to probabilities

with torch.no_grad():
    for filename in test_filenames:
        img_path = os.path.join(test_folder, filename + test_ext)
        image = Image.open(img_path).convert('RGB')
        orig_size = (image.width, image.height)
        image = transform(image)
        image = image.unsqueeze(0)

        if torch.cuda.is_available():
            image = image.to('cuda')

        output = model(image)['out']
        print("Shape after model:", output.shape)

        probs = F.softmax(output, dim=1)
        print("Shape after softmax:", probs.shape)

        output = torch.argmax(output, dim=1)
        print("Shape after argmax:", output.shape)

        output = output.squeeze().cpu().numpy()
        print("Shape after squeeze and to numpy:", output.shape)

        output = cv2.resize(output, orig_size, interpolation=cv2.INTER_NEAREST)
        print("Shape after resize:", output.shape)
        output = np.squeeze(output)

        for i in range(3):  # for each color channel
            orig_img[:, :, i] = orig_img[:, :, i] * output
            # Save the image
        res = cv2.imwrite(filename + '_segmented.jpg', orig_img)
        print(res)

Shape after model: torch.Size([1, 2, 32, 32])
Shape after softmax: torch.Size([1, 2, 32, 32])
Shape after argmax: torch.Size([1, 32, 32])
Shape after squeeze and to numpy: (32, 32)
Shape after resize: (640, 640)
True
Shape after model: torch.Size([1, 2, 32, 32])
Shape after softmax: torch.Size([1, 2, 32, 32])
Shape after argmax: torch.Size([1, 32, 32])
Shape after squeeze and to numpy: (32, 32)
Shape after resize: (640, 640)
True
Shape after model: torch.Size([1, 2, 32, 32])
Shape after softmax: torch.Size([1, 2, 32, 32])
Shape after argmax: torch.Size([1, 32, 32])
Shape after squeeze and to numpy: (32, 32)
Shape after resize: (640, 640)
True
Shape after model: torch.Size([1, 2, 32, 32])
Shape after softmax: torch.Size([1, 2, 32, 32])
Shape after argmax: torch.Size([1, 32, 32])
Shape after squeeze and to numpy: (32, 32)
Shape after resize: (640, 640)
True
Shape after model: torch.Size([1, 2, 32, 32])
Shape after softmax: torch.Size([1, 2, 32, 32])
Shape after argmax: torch.Size([1, 32,