In [1]:
import os
from PIL import Image
from torchvision.transforms import ToTensor
import torch

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir_occluded1, root_dir_occluded2, root_dir_clear, transform=None):
        self.root_dir_occluded1 = root_dir_occluded1
        self.root_dir_occluded2 = root_dir_occluded2
        self.root_dir_clear = root_dir_clear
        self.transform = transform

        # Collect all image paths from nested directories for occluded images 1
        self.occluded1_image_paths = []
        for root, _, files in os.walk(root_dir_occluded1):
            for file in files:
                if file.endswith(".jpg") or file.endswith(".png"):
                    self.occluded1_image_paths.append(os.path.join(root, file))

        # Collect all image paths from nested directories for occluded images 2
        self.occluded2_image_paths = []
        for root, _, files in os.walk(root_dir_occluded2):
            for file in files:
                if file.endswith(".jpg") or file.endswith(".png"):
                    self.occluded2_image_paths.append(os.path.join(root, file))

        # Collect all image paths from nested directories for clear images
        self.clear_image_paths = []
        for root, _, files in os.walk(root_dir_clear):
            for file in files:
                if file.endswith(".jpg") or file.endswith(".png"):
                    self.clear_image_paths.append(os.path.join(root, file))

    def __len__(self):
        return len(self.occluded1_image_paths) + len(self.occluded2_image_paths) + len(self.clear_image_paths)

    def __getitem__(self, idx):
        if idx < len(self.occluded1_image_paths):
            img_name = self.occluded1_image_paths[idx]
            label = 0  
        elif idx < len(self.occluded1_image_paths) + len(self.occluded2_image_paths):
            img_name = self.occluded2_image_paths[idx - len(self.occluded1_image_paths)]
            label = 0  
        else:
            img_name = self.clear_image_paths[idx - len(self.occluded1_image_paths) - len(self.occluded2_image_paths)]
            label = 1 
        
        image = Image.open(img_name)
        
        if self.transform:
            image = self.transform(image)

        return image, label


In [2]:
from torchvision.transforms import Compose, Resize, ToTensor
from torch.utils.data import DataLoader

batch_size = 32
shuffle=True
transform = Compose([
    Resize((224, 224)),
    ToTensor(),
])
root_dir_occluded1=r'masked'
root_dir_occluded2=r'sunglasses'
root_dir_clear=r'neutral'
my_dataset = CustomDataset(root_dir_occluded1, root_dir_occluded2, root_dir_clear, transform=transform)
data_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=shuffle)


In [3]:
import torch
import torch.nn as nn

def pixel_loss(output, target):
    c, h, w = target.size(1), target.size(2), target.size(3)
    loss = nn.MSELoss(reduction='sum')(output, target) / (c * h * w)
    return loss

def perceptual_loss(output, target, vgg_model):

    output_features = vgg_model(output)
    target_features = vgg_model(target)

    loss = 0
    for i in range(len(output_features)):
        for j in range(len(output_features[i])):
            c_i_j, h_i_j, w_i_j = output_features[i][j].size(1), output_features[i][j].size(2), output_features[i][j].size(3)
            loss += nn.L1Loss(reduction='sum')(output_features[i][j], target_features[i][j]) / (c_i_j * h_i_j * w_i_j)

    return loss

def lr_average_loss(output, target):
    
    c, h, w = target.size(1), target.size(2), target.size(3)
    avg_pool = nn.AvgPool2d(kernel_size=3, stride=1, padding=1)

    I_b = torch.norm(avg_pool(target) - torch.flip(avg_pool(target), dims=[3]), p=1, dim=1)
    I_hat = torch.norm(avg_pool(output) - torch.flip(avg_pool(output), dims=[3]), p=1, dim=1)

    loss = nn.L1Loss(reduction='sum')(I_b, I_hat) / (c * h * w)
    return loss

def w_smooth_loss(output, target):
    c, h, w = target.size(1), target.size(2), target.size(3)
    a_H = 1 - torch.norm(target[:, :, :-1, :] - target[:, :, 1:, :], p=1, dim=1)
    a_W = 1 - torch.norm(target[:, :, :, :-1] - target[:, :, :, 1:], p=1, dim=1)
    d_H = torch.norm(output[:, :, :-1, :] - output[:, :, 1:, :], p=1, dim=1)
    d_W = torch.norm(output[:, :, :, :-1] - output[:, :, :, 1:], p=1, dim=1)

    loss = nn.L1Loss(reduction='sum')(a_H * d_H + a_W * d_W) / (c * h * w)
    return loss

In [5]:
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vgg16
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

class LaplacianPriorNet(nn.Module):
    def __init__(self, out_channels):
        super(LaplacianPriorNet, self).__init__()
        self.conv1 = nn.Conv2d(3, out_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.relu(out)
        return out

class SILPAutoencoder(nn.Module):
    def __init__(self, out_channels, in_features, out_features):
        super(SILPAutoencoder, self).__init__()
        self.laplacian_prior_net = LaplacianPriorNet(out_channels)
        self.encoder = nn.Sequential(
            nn.Conv2d(out_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # Calculate the size of the feature maps after the encoder
        with torch.no_grad():
            dummy_input = torch.zeros(1, out_channels, 224, 224)
            encoder_output_size = self.encoder(dummy_input).view(1, -1).size(1)
        self.code_conversion = nn.Linear(encoder_output_size, out_features)  # Adjusted input size
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(128, 3, kernel_size=3, padding=1),
            nn.Sigmoid()
        )
        self.upsample = nn.Upsample(size=(224, 224), mode='bilinear', align_corners=True)

    def forward(self, x):
        laplacian_features = self.laplacian_prior_net(x)
        encoded = self.encoder(laplacian_features)
        # Flatten the encoder output before passing to the linear layer
        code = self.code_conversion(encoded.view(encoded.size(0), -1))
        decoded = self.decoder(code.view(code.size(0), -1, 1, 1))
        outputs = self.upsample(decoded)
        return outputs

from PIL import UnidentifiedImageError


# Set up the training process
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
out_channels = 128  # Example output channels of LaplacianPriorNet
in_features = 224
out_features = 128  # Example output dimension of decoder
num_epochs = 10 

# Define transformations for the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Load datasets
occluded_dataset = CustomDataset(root_dir=r'C:\Users\dk\Documents\silp\occluded\masked', transform=transform)
clear_dataset = CustomDataset(root_dir=r'C:\Users\dk\Documents\silp\neutral', transform=transform)

# Define data loaders
batch_size = 32
occluded_data_loader = DataLoader(occluded_dataset, batch_size=batch_size, shuffle=True)
clear_data_loader = DataLoader(clear_dataset, batch_size=batch_size, shuffle=True)

# Assuming you have a pretrained VGG model
vgg_model = vgg16(pretrained=True).features.to(device).eval() 

model = SILPAutoencoder(out_channels, in_features, out_features).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    for occluded_images, clear_images in zip(occluded_data_loader, clear_data_loader):
        occluded_images = occluded_images.to(device)
        clear_images = clear_images.to(device)

        # Calculate the losses
        outputs = model(occluded_images)

        # Resize outputs to match the size of clear_images
        outputs_resized = torch.nn.functional.interpolate(outputs, size=clear_images.shape[2:], mode='bilinear', align_corners=False)

        pixel_loss_value = pixel_loss(outputs_resized, clear_images)
        perceptual_loss_value = perceptual_loss(outputs_resized, clear_images, vgg_model)
        lr_average_loss_value = lr_average_loss(outputs_resized, clear_images)
        w_smooth_loss_value = w_smooth_loss(outputs_resized, clear_images)
        k1, k2, k3, k4 = 1, 0.25, 0.1, 0.1
        total_loss = k1 * pixel_loss_value + k2 * perceptual_loss_value + k3 * lr_average_loss_value + k4 * w_smooth_loss_value

        # Backpropagate and update the model
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [14]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import vgg16

In [15]:
def pixel_loss(output, target):
    c, h, w = target.size(1), target.size(2), target.size(3)
    loss = nn.MSELoss(reduction='sum')(output, target) / (c * h * w)
    return loss
def perceptual_loss(output, target, vgg_model):
    output_features = vgg_model(output)
    target_features = vgg_model(target)

    loss = 0
    for i in range(len(output_features)):
        for j in range(len(output_features[i])):
            c_i_j, h_i_j, w_i_j = output_features[i][j].size(1), output_features[i][j].size(2), output_features[i][j].size(3)
            loss += nn.L1Loss(reduction='sum')(output_features[i][j], target_features[i][j]) / (c_i_j * h_i_j * w_i_j)

    return loss
def lr_average_loss(output, target):
    c, h, w = target.size(1), target.size(2), target.size(3)
    avg_pool = nn.AvgPool2d(kernel_size=3, stride=1, padding=1)

    I_b = torch.norm(avg_pool(target) - torch.flip(avg_pool(target), dims=[3]), p=1, dim=1)
    I_hat = torch.norm(avg_pool(output) - torch.flip(avg_pool(output), dims=[3]), p=1, dim=1)

    loss = nn.L1Loss(reduction='sum')(I_b, I_hat) / (c * h * w)
    return loss
def w_smooth_loss(output, target):
    c, h, w = target.size(1), target.size(2), target.size(3)
    a_H = 1 - torch.norm(target[:, :, :-1, :] - target[:, :, 1:, :], p=1, dim=1)
    a_W = 1 - torch.norm(target[:, :, :, :-1] - target[:, :, :, 1:], p=1, dim=1)
    d_H = torch.norm(output[:, :, :-1, :] - output[:, :, 1:, :], p=1, dim=1)
    d_W = torch.norm(output[:, :, :, :-1] - output[:, :, :, 1:], p=1, dim=1)

    loss = nn.L1Loss(reduction='sum')(a_H * d_H + a_W * d_W) / (c * h * w)
    return loss


In [21]:
# Constants
DIRECTORY = r"dataset"
CATEGORIES = ["with_mask", "without_mask"]
# Data loading and preprocessing
from torchvision import transforms

# Define a transformation to resize the images
resize_transform = transforms.Resize((224, 224))

# Data loading and preprocessing
data = []
labels = []

for category in CATEGORIES:
    path = os.path.join(DIRECTORY, category)
    for img in os.listdir(path):
        img_path = os.path.join(path, img)
        image = Image.open(img_path)
        image = resize_transform(image)  # Resize the image
        image = transforms.ToTensor()(image)  # Convert to tensor

        data.append(image)
        labels.append(category)


In [27]:
# Split the data into training and testing sets
(trainX, testX, trainY, testY) = train_test_split(data, labels,
                                                  test_size=0.20, stratify=labels, random_state=42)


In [28]:
class LaplacianPriorNet(nn.Module):
    def __init__(self, out_channels):
        super(LaplacianPriorNet, self).__init__()
        self.conv1 = nn.Conv2d(3, out_channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.conv1(x)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.relu(out)
        return out
class SILPAutoencoder(nn.Module):
    def __init__(self, out_channels, in_features, out_features):
        super(SILPAutoencoder, self).__init__()
        self.laplacian_prior_net = LaplacianPriorNet(out_channels)
        self.encoder = nn.Sequential(
            nn.Conv2d(out_channels, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # Calculate the size of the feature maps after the encoder
        with torch.no_grad():
            dummy_input = torch.zeros(1, out_channels, 224, 224)
            encoder_output_size = self.encoder(dummy_input).view(1, -1).size(1)
        self.code_conversion = nn.Linear(encoder_output_size, out_features)  # Adjusted input size
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(128, 3, kernel_size=3, padding=1),
            nn.Sigmoid()
        )
        self.upsample = nn.Upsample(size=(224, 224), mode='bilinear', align_corners=True)

    def forward(self, x):
        laplacian_features = self.laplacian_prior_net(x)
        encoded = self.encoder(laplacian_features)
        # Flatten the encoder output before passing to the linear layer
        code = self.code_conversion(encoded.view(encoded.size(0), -1))
        decoded = self.decoder(code.view(code.size(0), -1, 1, 1))
        outputs = self.upsample(decoded)
        return outputs
