In [1]:
from torchvision import transforms
from torchvision.utils import Image
import os
import torch.nn as nn
from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights
import tensorflow as tf
import torch
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.optim as optim
import matplotlib.pyplot as plt




In [2]:
def Read_Data(is_train = True):
  temp = []
  updated_path = os.path.join("VOC2012_train_val","VOC2012_train_val","ImageSets","Segmentation","train.txt" if is_train else "val.txt")
  with open(updated_path,"r") as file_:
    Instances = file_.read().split()
    for img in Instances:
      path_img = os.path.join("VOC2012_train_val","VOC2012_train_val","JPEGImages",img+".jpg")
      path_label = os.path.join("VOC2012_train_val","VOC2012_train_val","SegmentationClass",img+".png")
      temp.append([path_img,path_label])
  return temp

In [3]:
Train = Read_Data(is_train=True)

In [4]:
ms = []
transform = transforms.Compose([transforms.Resize((256,256)),transforms.ToTensor()])
for i in Train:
    mask = Image.open(i[1])
    mask_tensor = transform(mask)
    ms.append(mask_tensor)    

In [5]:
imgs = []
transform = transforms.Compose([transforms.Resize((256,256)),transforms.ToTensor()])
for i in Train:
    mask = Image.open(i[0])
    mask_tensor = transform(mask)
    imgs.append(mask_tensor)

In [6]:
data_set = []
for i in range(len(ms)):
    data_set.append([imgs[i],ms[i]])
print(len(data_set))

1464


In [7]:
dataset = DataLoader(data_set, batch_size=32, shuffle=True)

In [8]:
class UNetWithResNet(nn.Module):
    def __init__(self):
        super(UNetWithResNet, self).__init__()
        self.enc1 = nn.Sequential(
            nn.Conv2d(3,64, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.enc2 = nn.Sequential(
            nn.Conv2d(64,128, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,128, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.enc3 = nn.Sequential(
            nn.Conv2d(128,256, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256,256, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.enc4 = nn.Sequential(
            nn.Conv2d(256,512, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,512, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.max = nn.MaxPool2d(kernel_size=2, stride=2)
        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec1 = nn.Sequential(
            nn.Conv2d(512,256, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256,256, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.dec2 = nn.Sequential(
            nn.Conv2d(256,128, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,128, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.dec3 = nn.Sequential(
            nn.Conv2d(128,64, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64, kernel_size=1),
            nn.ReLU(inplace=True)
        )
        self.out = nn.Conv2d(64, 1, kernel_size=1)
        
    def forward(self, x):
        x1 = self.enc1(x)
        x2 = self.enc2(self.max(x1))
        x3 = self.enc3(self.max(x2))
        x4 = self.enc4(self.max(x3))

        x = self.upconv3(x4)
        x = torch.cat([x, x3], dim=1)
        x = self.dec1(x)

        x = self.upconv2(x)
        x = torch.cat([x, x2], dim=1)
        x = self.dec2(x)

        # Define the last decoder block with skip connections
        x = self.upconv1(x)
        x = torch.cat([x, x1], dim=1)
        x = self.dec3(x)
        return self.out(x)

In [9]:
model = UNetWithResNet()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, targets in dataset:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    
    epoch_loss = running_loss / len(dataset)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")

Epoch [1/10], Loss: 12.2537
Epoch [2/10], Loss: 7.7927
Epoch [3/10], Loss: 7.7980
Epoch [4/10], Loss: 7.8081
Epoch [5/10], Loss: 7.7514
Epoch [6/10], Loss: 7.7703
Epoch [7/10], Loss: 7.6762
Epoch [8/10], Loss: 7.5047
Epoch [9/10], Loss: 7.4118
Epoch [10/10], Loss: 7.5377


In [11]:
torch.save(model.state_dict(), 'f1_model.pt')

In [14]:
img = Image.open('VOC2012_test\\VOC2012_test\\JPEGImages\\2008_000012.jpg')
transform = transforms.Compose([transforms.Resize((256,256)),transforms.ToTensor()])
transformed_img = transform(img)
print(transformed_img.shape)
prediction = model(transformed_img)
plt.imshow(img)
plt.imshow(prediction, cmap='jet', alpha=0.5)
plt.show()

torch.Size([3, 256, 256])


RuntimeError: Given groups=1, weight of size [256, 512, 1, 1], expected input[1, 256, 128, 64] to have 512 channels, but got 256 channels instead