In [4]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from torch.nn import functional as F
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import LazyLinear
from PIL import Image
import requests
from torch.optim import Adam

import warnings
warnings.filterwarnings('ignore')

image_size = (256,256)

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models

class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, kernel_size = 3):
        super(BasicBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, stride = stride, padding = (kernel_size - 1)//2)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(p=0.1)
            
    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.dropout(out) 
        return out
    
class EncoderBlock(nn.Module):
    def __init__(self, in_channels, strided = True):
        super(EncoderBlock, self).__init__()
        out_channels = in_channels*2 if strided else in_channels
        self.layer1 = BasicBlock(in_channels, out_channels, stride=2 if strided else 1)
        self.layer2 = BasicBlock(out_channels, out_channels)
        self.layer3 = BasicBlock(out_channels, out_channels)
        self.layer4 = BasicBlock(out_channels, out_channels)
        self.downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2 if strided else 1, bias=False),
            nn.BatchNorm2d(out_channels),
        )
        
    def forward(self, x):
        out = self.layer1(x)
        residual1 = self.downsample(x)
        out = self.layer2(out) + residual1
        residual2 = out
        out = self.layer3(out)
        out = self.layer4(out) + residual2
        return out
    
class DecoderBlock(nn.Module):
    def __init__(self, in_channels):
        super(DecoderBlock, self).__init__()
        self.layer1 = BasicBlock(in_channels, in_channels // 4, kernel_size = 1)
        self.layer2 = nn.Sequential(
            nn.ConvTranspose2d(in_channels // 4, in_channels // 4, kernel_size = 3, stride = 2, padding=1, output_padding=1),
            nn.BatchNorm2d(in_channels // 4)
        )
        self.layer3 = BasicBlock(in_channels // 4, in_channels // 2, kernel_size = 1)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        return out
    
class InitialBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(InitialBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=7, stride=2, padding=3)
        self.bn = nn.BatchNorm2d(out_channels)
        self.maxpool = nn.MaxPool2d(kernel_size = 2, stride = 2)
    
    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.maxpool(out)
        return out
    
class FinalBlock(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(FinalBlock, self).__init__()
        
        self.transposeconv1 = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size = 3, stride = 2, output_padding=0)
        self.bn1 = nn.BatchNorm2d(in_channels // 2)
        
        self.conv1 = nn.Conv2d(in_channels // 2, in_channels // 2, kernel_size = 2)
        self.bn2 = nn.BatchNorm2d(in_channels // 2)
        
        self.conv2 = nn.Conv2d(in_channels // 2, out_channels=num_classes, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(num_classes)
        
        self.sigmoid = nn.Sigmoid()      
        
    def forward(self, x):
        out = self.transposeconv1(x)
        out = self.bn1(out)
        out = self.conv1(out)
        out = self.bn2(out)
        out = self.conv2(out)
        out = self.bn3(out)
        out = self.sigmoid(out)
        return out
    
class LinkNet(nn.Module):
    def __init__(self, num_classes):
        super(LinkNet, self).__init__()
        self.initblock = InitialBlock(3, 64)
        self.encoder1 = EncoderBlock(64, strided = False)
        self.encoder2 = EncoderBlock(64)
        self.encoder3 = EncoderBlock(128)
        
        self.encoder4 = EncoderBlock(256)
        self.decoder4 = DecoderBlock(512)
        
        self.decoder3 = DecoderBlock(256)
        self.decoder2 = DecoderBlock(128)
        self.decoder1 = DecoderBlock(64)
        self.finalblock = FinalBlock(32, num_classes)
        
    def forward(self, x):
        out = self.initblock(x)
        residual1 = self.encoder1(out)
        residual2 = self.encoder2(residual1)
        
        # out = self.encoder3(residual2)
        
        residual3 = self.encoder3(residual2)
        out = self.encoder4(residual3)
        out = self.decoder4(out) + residual3
        
        out = self.decoder3(out) + residual2
        out = self.decoder2(out) + residual1
        out = self.decoder1(out)
        out = self.finalblock(out)
        return out

In [6]:
from torch.utils.data import DataLoader
from copy import deepcopy
import matplotlib.pyplot as plt

num_classes = 1

model = LinkNet(num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a machine with multiple GPUs.
if torch.cuda.device_count() > 1:
    print(torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    model = nn.DataParallel(model)
    
# model_path = 'model/best_lips_segmentation_linknet.pt'

model_path = 'best_lips_segmentation_linknet_full.pt'

state_dict = torch.load(model_path, map_location=device)
new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
model.load_state_dict(new_state_dict)



model.to(device)

LinkNet(
  (initblock): InitialBlock(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (encoder1): EncoderBlock(
    (layer1): BasicBlock(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (layer2): BasicBlock(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (layer3): BasicBlock(
      (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [7]:
# import matplotlib.pyplot as plt
# import urllib
# from PIL import Image
# from io import BytesIO
# from torchvision import transforms
# import numpy as np
# import cv2

# # Your image URL
# url = 'https://www.shutterstock.com/image-photo/expression-people-concept-smiling-man-600nw-483376930.jpg'

# # Open the URL image
# with urllib.request.urlopen(url) as url:
#     s = url.read()
#     img = Image.open(BytesIO(s))


# # Define the transform
# transform = transforms.Compose([
#     transforms.Resize(image_size),
#     transforms.ToTensor()
# ])

# img_tensor = transform(img)
# img_tensor = img_tensor.unsqueeze(0)  # add a batch dimension

# # Pass the image through the model
# with torch.no_grad():
#     output = model(img_tensor)

# # Postprocess the output
# output = output.squeeze().cpu().numpy()
# output = (output > 0.5).astype('uint8')  # apply a threshold

# # Create an overlay by repeating the output along the third axis
# overlay = np.stack([output]*3, axis=-1)

# # Convert overlay to float
# overlay = overlay.astype(np.float32)

# # Apply the overlay
# overlay_img = cv2.addWeighted(img_tensor.squeeze().cpu().numpy().transpose(1, 2, 0), 0.5, overlay, 0.5, 0)

# # Plot the original image
# plt.figure(figsize=(10, 10))
# plt.subplot(1, 2, 1)
# plt.imshow(img)
# plt.title('Original Image')

# # Plot the image with overlay
# plt.subplot(1, 2, 2)
# plt.imshow(overlay_img)
# plt.title('Image with Overlay')
# plt.show()

In [8]:
import cv2
import torch
from torchvision import transforms
import numpy as np

# Define the transform
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor()
])

In [9]:
def rgb_to_bgr(color):
    """Convert a color from RGB to BGR format."""
    return color[::-1]

In [10]:
import cv2
import time
import numpy as np
from PIL import Image

# Define the desired output size
output_size = (640, 480)  # width, height

# Define the color of the overlay in BGR format
overlay_color = np.array([127, 127, 255])  
overlay_color = rgb_to_bgr(overlay_color)

# Open the camera
cap = cv2.VideoCapture(0)

# Check if the webcam is opened correctly
if not cap.isOpened():
    raise IOError("Cannot open webcam")

# used to record the time when we processed last frame 
prev_frame_time = 0
  
# used to record the time at which we processed current frame 
new_frame_time = 0

while True:
    # Read a frame from the camera
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to a PIL Image
    frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    # Preprocess the frame
    frame = transform(frame)
    frame = frame.unsqueeze(0)  # add a batch dimension

    # Pass the frame through the model
    with torch.no_grad():
        output = model(frame)

    # Postprocess the output
    output = output.squeeze().cpu().numpy()
    output = (output > 0.7).astype('uint8')  # apply a threshold

    # Create an overlay by repeating the output along the third axis
    overlay = np.stack([output]*3, axis=-1)

    # Apply the color to the overlay
    colored_overlay = overlay * overlay_color

    # Convert overlay to float
    colored_overlay = colored_overlay.astype(np.float32)

    # Create a mask from the output
    mask = output.astype(bool)

    # Apply the overlay only where the mask is True
    frame_with_overlay = frame.squeeze().cpu().numpy().transpose(1, 2, 0).copy()
    frame_with_overlay[mask] = cv2.addWeighted(frame_with_overlay[mask], 0.5, colored_overlay[mask], 0.5, 0)

    # Convert the frame back to BGR color space
    frame_bgr = cv2.cvtColor(frame.squeeze().cpu().numpy().transpose(1, 2, 0), cv2.COLOR_RGB2BGR)
    frame_with_overlay_bgr = cv2.cvtColor(frame_with_overlay, cv2.COLOR_RGB2BGR)

    # Calculate and display the FPS
    new_frame_time = time.time()
    fps = 1 / (new_frame_time - prev_frame_time)
    prev_frame_time = new_frame_time
    cv2.putText(frame_with_overlay_bgr, f'FPS: {fps:.2f}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Display the original frame and the frame with overlay
    frame_with_overlay_bgr_resized = cv2.resize(frame_with_overlay_bgr, output_size)
    cv2.imshow('Frame with Overlay', frame_with_overlay_bgr_resized)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close the windows
cap.release()
cv2.destroyAllWindows()