In [1]:
import torch
import torch.nn as nn
import torchvision

In [2]:
class identity_block(nn.Module):
    def __init__(self, in_fts, kernel_size, filters):
        super(identity_block, self).__init__()
        self.f1, self.f2, self.f3 = filters
        self.l1 = nn.Conv2d(in_fts, self.f1, kernel_size=(1,1), padding='valid')
        self.l2 = nn.BatchNorm2d(self.f1)
        self.l3 = nn.ReLU()
        self.l4 = nn.Conv2d(self.f1, self.f2, kernel_size=kernel_size, padding='same')
        self.l5 = nn.BatchNorm2d(self.f2)
        self.l6 = nn.ReLU()
        self.l7 = nn.Conv2d(self.f2, self.f3, kernel_size=(1,1), padding='valid')
        self.l8 = nn.BatchNorm2d(self.f3)
    
    def forward(self, input_):
        x = self.l1(input_)
        x = self.l2(x)
        x = self.l3(x)

        x = self.l4(x)
        x = self.l5(x)
        x = self.l6(x)

        x = self.l7(x)
        x = self.l8(x)

        x = x + input_
        x = nn.ReLU()(x)
        return x

In [3]:
class conv_block(nn.Module):
    def __init__(self, in_fts, kernel_size, filters, strides=(2,2)):
        super(conv_block, self).__init__()
        self.f1, self.f2, self.f3 = filters
        self.l1 = nn.Conv2d(in_fts, self.f1, kernel_size=(1,1), stride=strides)
        self.l2 = nn.BatchNorm2d(self.f1)
        self.l3 = nn.ReLU()
        self.l4 = nn.Conv2d(self.f1, self.f2, kernel_size=kernel_size, padding='same')
        self.l5 = nn.BatchNorm2d(self.f2)
        self.l6 = nn.ReLU()
        self.l7 = nn.Conv2d(self.f2, self.f3, kernel_size=(1,1))
        self.l8 = nn.BatchNorm2d(self.f3)
        self.l9 = nn.Conv2d(in_fts, self.f3, kernel_size=(1,1), stride=strides)
    
    def forward(self, input_):
        x = self.l1(input_)
        x = self.l2(x)
        x = self.l3(x)

        x = self.l4(x)
        x = self.l5(x)
        x = self.l6(x)

        x = self.l7(x)
        x = self.l8(x)

        shortcut = self.l9(input_)

        x = x + shortcut
        x = nn.ReLU()(x)
        return x

In [4]:
class MyResnet(nn.Module):
    def __init__(self, K):
        super(MyResnet, self).__init__()
        self.l1 = nn.ZeroPad2d(padding=(3,3))
        self.l2 = nn.Conv2d(3, 64, kernel_size=(7,7), stride=(2,2), padding='valid')
        self.l3 = nn.BatchNorm2d(64)
        self.l4 = nn.ZeroPad2d(padding=(1,1))
        self.l5 = nn.MaxPool2d(kernel_size=(3,3), stride=(2,2))
        self.l6 = nn.Sequential(
            conv_block(64, 3, [64, 64, 256], strides=(1,1)),
            identity_block(256, 3, [64, 64, 256]),
            identity_block(256, 3, [64, 64, 256])
        )
        self.l7 = nn.Sequential(
            conv_block(256, 3, [128, 128, 512]),
            identity_block(512, 3, [128, 128, 512]),
            identity_block(512, 3, [128, 128, 512]),
            identity_block(512, 3, [128, 128, 512])
        )
        self.l8 = nn.AdaptiveAvgPool2d((1, 1))
        self.l9 = nn.Linear(512, 1024)
        #self.l92 = nn.Linear(256, 128)
        self.l92 = nn.Dropout(0.2)
        self.l93 = nn.Linear(1024,3)
        self.l10 = nn.Linear(512, 1024)
        #self.l11 = nn.Linear(256, 128)
        #self.l12 = nn.Linear(128, 64)
        self.l11 = nn.Dropout(0.3)
        self.l13 = nn.Linear(1024, 4)

    def forward(self, x):
        x = self.l1(x)
        x = self.l2(x)
        x = self.l3(x)
        x = nn.ReLU()(x)
        x = self.l4(x)
        x = self.l5(x)

        x = self.l6(x)

        x = self.l7(x)

        x = self.l8(x)
        x = torch.flatten(x, 1)
        x1 = self.l9(x)
        x1 = nn.ReLU()(x1)
        x1 = self.l92(x1)
        x1 = self.l93(x1)
        # x1 = nn.Softmax(dim=1)(x1)
        x2 = self.l10(x)
        x2 = nn.ReLU()(x2)
        x2 = self.l11(x2)
        #x2 = self.l12(x2)
        x2 = self.l13(x2)
        x2 = nn.Sigmoid()(x2)
        return torch.concatenate((x1,x2), dim=1)

In [5]:
import json

with open('FaceMask/output.json', 'r') as file:
    data = json.load(file)

sizes = { d['file_name']: (d['height'], d['width']) for d in data['images'] }

In [6]:
from PIL import Image
new_height = 128
new_width = 128
mydata = {}
for i in data['annotations']:
    name = f"{i['image_id']}.png"

    height, width = sizes[name]
    h_scale = new_height / height
    w_scale = new_width / width

    x, y, w, h = i['bbox']

    x *= w_scale
    y *= h_scale
    w *= w_scale
    h *= h_scale

    x_norm = x / new_width
    y_norm = y / new_height
    w_norm = w / new_width
    h_norm = h / new_height

    cla = [i['category_id']-1]
    #cla[-1] = 1
    if name in mydata.keys():
        h = mydata[name]
        h.append(cla + [x_norm, y_norm, w_norm, h_norm])
        mydata[name] = h
    else:
        mydata[name] = [cla + [x_norm, y_norm, w_norm, h_norm]]
    # if img in x_train:
    #     h = 
    # x_train = x_train + [img]
    # y_train = y_train +  [cla + [x_norm, y_norm, w_norm, h_norm]]



In [7]:
x_train = []
y_train = []
for x in mydata:
    img = Image.open(f"FaceMask/images/{x}")
    if img.mode == 'RGBA':
        t = Image.open(f"FaceMask/images/{x}")
        img = t.convert('RGB')
    x_train.append(img)
    y_train.append(mydata[x][0])

In [8]:
from torch.utils.data import Dataset
class CustomDatatset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transforms = transform

    def __getitem__(self, index):
        image = self.data[0][index]
        tar = self.data[1][index]

        #image.resize((256, 256))
        
        if self.transforms:
            image = self.transforms(image)
        
        # image = image.permute(2, 0, 1)
        
        return (image, tar)
    
    def __len__(self):
        return len(self.data[0])

In [9]:
import torch.utils
import torch.utils.data

tran = torchvision.transforms.Compose([
    torchvision.transforms.Resize((128, 128)),
    torchvision.transforms.ToTensor(),
])
dat = CustomDatatset(data=(x_train[:-100], torch.tensor(y_train[:-100])), transform=tran)

loader = torch.utils.data.DataLoader(dataset=dat, shuffle=True, batch_size=24)

In [19]:
model = MyResnet(10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = model.to(device)

cpu


In [11]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()
        # Initialize Binary Cross Entropy and Categorical Cross Entropy loss functions
        self.bce = nn.SmoothL1Loss(reduction='none')  # For binary classification
        self.cce = nn.CrossEntropyLoss(reduction='none')  # For categorical classification

    def forward(self, y_true, y_pred):
        # Extract parts from y_true and y_pred
        loc_true = y_true[:, 1:]
        loc_pred = y_pred[:, 3:]
        class_true = y_true[:, :1]  # Convert to long for CrossEntropyLoss
        class_true = torch.squeeze(class_true)
        if class_true.ndim == 0:
            # It's a scalar, so convert to a 1D tensor with one element
            class_true = torch.tensor([class_true.item()])
        class_true = class_true.to(torch.long)
        class_pred = y_pred[:, :3]

        # Compute binary cross entropy for location
        bce = self.bce(loc_pred, loc_true)
        bce = bce.mean(dim=1)  # Average over the last dimension (batch size)
        
        # Compute categorical cross entropy for object class
        cce = self.cce(class_pred, class_true)
        #cce = cce.mean(dim=-1)  # Average over the last dimension (batch size)

        # Combine losses
        total_loss = bce.mean() + cce.mean()

        return total_loss

In [12]:
criterion = CustomLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.0001, momentum=0.9)

In [13]:
tran = torchvision.transforms.Compose([
    torchvision.transforms.Resize((128, 128)),
    torchvision.transforms.ToTensor(),
])
val_dat = CustomDatatset(data=(x_train[-100:], torch.tensor(y_train[-100:])), transform=tran)

val_loader = torch.utils.data.DataLoader(dataset=val_dat, shuffle=True, batch_size=10)

In [14]:
losses = []
import numpy as np
for i in range(20):
    model.train()
    train_loss = []
    val_losses = []
    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)

        loss = criterion(targets, outputs)

        optimizer.zero_grad()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        train_loss.append(loss.item())
    
    model.eval()
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)

            loss = criterion(targets, outputs)

            val_losses.append(loss.item())
    print(f"Epoch {i+1}/20: train_loss:{np.mean(train_loss):.4f} Val Loss: {np.mean(val_losses):.4f}")


Epoch 1/20: train_loss:0.9355 Val Loss: 0.9690
Epoch 2/20: train_loss:0.7630 Val Loss: 0.7386
Epoch 3/20: train_loss:0.6788 Val Loss: 0.6789
Epoch 4/20: train_loss:0.6315 Val Loss: 0.6672
Epoch 5/20: train_loss:0.6132 Val Loss: 0.6653
Epoch 6/20: train_loss:0.6023 Val Loss: 0.6650
Epoch 7/20: train_loss:0.6125 Val Loss: 0.6656
Epoch 8/20: train_loss:0.6058 Val Loss: 0.6643
Epoch 9/20: train_loss:0.6192 Val Loss: 0.6648
Epoch 10/20: train_loss:0.5893 Val Loss: 0.6645
Epoch 11/20: train_loss:0.5919 Val Loss: 0.6634
Epoch 12/20: train_loss:0.5978 Val Loss: 0.6625
Epoch 13/20: train_loss:0.5952 Val Loss: 0.6619
Epoch 14/20: train_loss:0.5903 Val Loss: 0.6630
Epoch 15/20: train_loss:0.5889 Val Loss: 0.6622
Epoch 16/20: train_loss:0.5842 Val Loss: 0.6605
Epoch 17/20: train_loss:0.5735 Val Loss: 0.6615
Epoch 18/20: train_loss:0.5903 Val Loss: 0.6615
Epoch 19/20: train_loss:0.5883 Val Loss: 0.6603
Epoch 20/20: train_loss:0.5883 Val Loss: 0.6588


In [18]:
test_loader = torch.utils.data.DataLoader(dataset=val_dat, shuffle=True, batch_size=1)
myimg, myout = next(iter(test_loader))
o = model(myimg)
bbox = o[0][3:] * 128
tr = myout[0][1:] * 128
from PIL import ImageDraw
myimg = myimg.squeeze(0)  # Remove the batch dimension

# Convert the tensor to numpy array
myimg = myimg.permute(1, 2, 0).numpy()  # Change shape to [H, W, C]

# Normalize if needed
if myimg.max() <= 1.0:
    myimg = (myimg * 255).astype(np.uint8)  # Convert to [0, 255] if in [0, 1]

# Convert to PIL Image
ima = Image.fromarray(myimg)
draw = ImageDraw.Draw(ima)
draw.rectangle([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]], outline='red', width=2)
draw.rectangle([tr[0], tr[1], tr[0]+tr[2], tr[1]+tr[3]], outline='black', width=2)
ima.show()

In [37]:
myout

tensor([[1.0000, 0.0175, 0.0000, 0.0600, 0.0767]])

In [31]:
myimg[0].shape

torch.Size([3, 128, 128])

ValueError: cannot select an axis to squeeze out which has size not equal to one