<a href="https://colab.research.google.com/github/Meenakshi2434/U-2-Net-Computer-Vision-Project/blob/main/Test_Assignment_U2_net_Task_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

# Set the dataset path
image_dir = '/content/drive/MyDrive/Image'
mask_dir = '/content/drive/MyDrive/Mask'

# Define the data augmentation and preprocessing
image_transform = transforms.Compose([
    transforms.Resize((320, 320)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((320, 320)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor()
    # Removed normalization for masks
])
# Update the dataset class to use different transforms for images and masks
class CarDataset(Dataset):
    def __init__(self, image_paths, mask_paths, image_transform, mask_transform):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.image_transform = image_transform
        self.mask_transform = mask_transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx])
        mask = Image.open(self.mask_paths[idx])
        image = self.image_transform(image)
        mask = self.mask_transform(mask) # Use mask_transform for masks
        mask = mask.unsqueeze(0)
        return image, mask

# Load the dataset
image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir)]
mask_files = [os.path.join(mask_dir, f) for f in os.listdir(mask_dir)]

# Import the necessary library
from sklearn.model_selection import train_test_split

# Split the dataset into training and validation sets
train_image_files, val_image_files, train_mask_files, val_mask_files = train_test_split(image_files, mask_files, test_size=0.25, random_state=42)

# Create the dataset and data loader using the updated transforms
train_dataset = CarDataset(train_image_files, train_mask_files, image_transform, mask_transform)
val_dataset = CarDataset(val_image_files, val_mask_files, image_transform, mask_transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

# Set the device (GPU or CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define the U-2-Net model
class U2Net(nn.Module):
    def __init__(self):
        super(U2Net, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1), # Add padding to maintain spatial dimensions
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), # Add padding
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1), # Add padding
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 1, kernel_size=2, stride=2),
            nn.Conv2d(1, 1, kernel_size=3, padding=1) # Add a final convolutional layer to adjust output size
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Define the loss function

def dice_coeff(output, target):
    smooth = 1
    output = output.view(-1)
    target = target.view(-1)
    intersection = (output * target).sum()
    union = output.sum() + target.sum()
    dice = (2 * intersection + smooth) / (union + smooth)
    return dice

def iou_coeff(output, target):
    output = output.view(-1)
    target = target.view(-1)
    intersection = (output * target).sum()
    union = output.sum() + target.sum() - intersection
    iou = intersection / union
    return iou

# Set the device (GPU or CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Initialize the model, optimizer, and loss function
model = U2Net()
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCEWithLogitsLoss()

# Train the model
for epoch in range(10):
    model.train()
    for i, (image, mask) in enumerate(train_loader):
        image = image.to(device)
        mask = mask.to(device)
        output = model(image)
        loss = criterion(output, mask[:, 0, :, :])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'Epoch {epoch+1}, Iter {i+1}, Loss: {loss.item()}')
    model.eval()
    with torch.no_grad():
        dice_score = 0
        iou = 0
        for i, (image, mask) in enumerate(val_loader):
            image = image.to(device)
            mask = mask.to(device)
            output = model(image)
            output = torch.sigmoid(output)
            output = output > 0.5
            dice_score += dice_coeff(output, mask[:, 0, :, :])
            iou += iou_coeff(output, mask[:, 0, :, :])
        dice_score /= len(val_loader)
        iou /= len(val_loader)
        print(f'Epoch {epoch+1}, Val Dice Score: {dice_score:.4f}, Val IoU: {iou:.4f}')

# Save the trained model
torch.save(model.state_dict(), 'u2net.pth')

Epoch 1, Iter 1, Loss: 0.7176752090454102
Epoch 1, Iter 2, Loss: 0.7128245234489441
Epoch 1, Iter 3, Loss: 0.715785801410675
Epoch 1, Iter 4, Loss: 0.7134995460510254
Epoch 1, Val Dice Score: 0.3611, Val IoU: 0.2207
Epoch 2, Iter 1, Loss: 0.7063144445419312
Epoch 2, Iter 2, Loss: 0.695849597454071
Epoch 2, Iter 3, Loss: 0.6879879832267761
Epoch 2, Iter 4, Loss: 0.6908737421035767
Epoch 2, Val Dice Score: 0.1443, Val IoU: 0.0779
Epoch 3, Iter 1, Loss: 0.6615478992462158
Epoch 3, Iter 2, Loss: 0.6336244940757751
Epoch 3, Iter 3, Loss: 0.6539619565010071
Epoch 3, Iter 4, Loss: 0.652955174446106
Epoch 3, Val Dice Score: 0.0693, Val IoU: 0.0359
Epoch 4, Iter 1, Loss: 0.6572124361991882
Epoch 4, Iter 2, Loss: 0.6686462759971619
Epoch 4, Iter 3, Loss: 0.5918661952018738
Epoch 4, Iter 4, Loss: 0.5883519649505615
Epoch 4, Val Dice Score: 0.0369, Val IoU: 0.0188
Epoch 5, Iter 1, Loss: 0.5856221318244934
Epoch 5, Iter 2, Loss: 0.5717297196388245
Epoch 5, Iter 3, Loss: 0.6569756269454956
Epoch 5, 

In [None]:
model_weights = model.state_dict()
model_weights

OrderedDict([('encoder.0.weight',
              tensor([[[[ 0.0877,  0.1019,  0.0801],
                        [-0.1902,  0.1863, -0.1814],
                        [ 0.0557,  0.1505,  0.1341]],
              
                       [[ 0.1795, -0.0644,  0.1223],
                        [ 0.1432, -0.0326, -0.0037],
                        [-0.0598,  0.1723, -0.1139]],
              
                       [[ 0.1506,  0.1148, -0.0892],
                        [-0.1507,  0.0170,  0.0566],
                        [-0.1071, -0.0015, -0.1869]]],
              
              
                      [[[ 0.1841, -0.1009,  0.0875],
                        [-0.0251, -0.1258,  0.1865],
                        [ 0.0918,  0.0836,  0.1916]],
              
                       [[ 0.1072, -0.0427,  0.0152],
                        [-0.0080,  0.1223,  0.0680],
                        [ 0.1137,  0.1503,  0.1134]],
              
                       [[ 0.0733,  0.1653, -0.1267],
                      

Task 2
