In [None]:
import os
import gc
import cv2
import time
import tqdm
import random
import collections
import numpy as np
import pandas as pd
from PIL import Image
from functools import partial
import matplotlib.pyplot as plt
import tqdm.notebook as tq
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score


import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

from torch.nn import ConvTranspose2d
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import Module
from torch.nn import ModuleList
from torch.nn import ReLU
from torchvision.transforms import CenterCrop
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam

import threading

import json
import os


from semseg import show_models
from semseg.models import *
import ptlflow
from ptlflow.utils import flow_utils
from ptlflow.utils.io_adapter import IOAdapter

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

In [None]:
height, width = 512, 512
# height, width = 256, 256
with_augs = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(10),
    transforms.RandomResizedCrop(size=(height, width), scale=(0.2, 1.0), ratio=(0.5, 2)),
    # transforms.Resize((height, width)),
    transforms.ToTensor()
])

transform = transforms.Compose([
    #transforms.RandomHorizontalFlip(),
    # transforms.RandomRotation(10),
    # transforms.RandomResizedCrop(size=224, scale=(0.4, 1.0)
    #                             #  , ratio=(0.5, 2)
    #                              ),
    # resize to 224x224
    # transforms.Resize((512, 512)),
    transforms.Resize((height, width)),
    transforms.ToTensor()
])
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [None]:

num_classes = 3

def get_binary_masks(pred):
    predicted_classes = np.argmax(pred, axis=0)
    masks = []
    for i in range(num_classes):
        mask = (predicted_classes == i).astype(np.uint8)
        masks.append(mask)
    return masks

def to_painted_image(image, label):
    green_image = np.zeros_like(image)
    blue_image = np.zeros_like(image)
    green_image[:, :] = [0, 255, 0]  # Green color in BGR
    blue_image[:, :] = [0, 0, 255]   # Blue color in BGR

    masks = get_binary_masks(label)

    alpha = 0.998  # Alpha value for blending
    beta = 1.0 - alpha
    blended_green = cv2.addWeighted(image, alpha, green_image, beta, 0)
    blended_blue = cv2.addWeighted(image, alpha, blue_image, beta, 0)
    blended_green = np.clip(blended_green, 0, 1)
    blended_blue = np.clip(blended_blue, 0, 1)

    output_image = image.copy()
    output_image[masks[0] == 1] = blended_blue[masks[0] == 1]
    output_image[masks[1] == 1] = blended_green[masks[1] == 1]

    return output_image

def print_dataset(dataset, i):
    img, flow, imu, mask = dataset[i]

    # flow = flow.to(device)
    # print(flow.shape)
    # imu = imu.to(device)
    # imu_x = model.imu_dense(imu.view(-1, 600).float()).reshape(-1, 3, 256, 256)
    # # print(f"IMU: {imu_x.shape}, flow: {flow.shape}")
    # flow = flow + imu_x
    # flow = flow.detach().cpu().squeeze(0)
    # print(flow.shape)
    
    img = img * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1) + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    img = img.permute(1, 2, 0).numpy()
    mask = mask.numpy()
    flow = flow.permute(1, 2, 0).numpy()
    flow = flow * 255
    img_painted = to_painted_image(img, mask)
    # print image and flow
    fig, ax = plt.subplots(1, 2, figsize=(7.5, 5))
    ax[0].imshow(img_painted)
    ax[0].set_title("Image Annotated")
    ax[1].imshow(flow)
    ax[1].set_title("Flow")
    plt.show()

def view_masks(dataset, i):
    img, flow, imu, mask, bbox = dataset[i]
    img = img * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1) + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    img = img.permute(1, 2, 0).numpy()
    # draw bounding boxes on img
    img_bb = img.copy()
    for b in bbox:
        # to int
        x1, y1, x2, y2 = [int(i) for i in b]
        cv2.rectangle(img_bb, (x1, y1), (x2, y2), (0, 255, 0), 2)
    mask = mask.numpy()
    # show image, mask[0] and mask[1]. Masks are binary, Show black and white
    fig, ax = plt.subplots(1, 3, figsize=(10, 5))
    ax[0].imshow(img_bb)
    ax[0].set_title("Imagem")
    ax[1].imshow(mask[0], cmap='gray')
    ax[1].set_title("Em movimento")
    ax[2].imshow(mask[1], cmap='gray')
    ax[2].set_title("Parado")
    plt.show()

In [None]:
from semseg.models.backbones import ResNet, PoolFormer, ConvNeXt
from semseg.models.heads import UPerHead, LawinHead
from torch.nn import functional as F

class SemSegModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.print_shape = True
        self.backbone_flow = ResNet('18')
        self.backbone_flow.load_state_dict(torch.load('../models/resnet18_a1.pth',
                                                        map_location='cpu'), strict=False)
        self.backbone_img = ResNet('50')
        self.backbone_img.load_state_dict(torch.load('/home/thiago/Workspace/motion-segmentation/src/resnet50_a1.pth',
                                                        map_location='cuda'), strict=False)
        backbone_channels = self.backbone_img.channels + self.backbone_flow.channels
        
        print(backbone_channels)
        self.head = LawinHead(backbone_channels, 128, num_classes=num_classes)
    
    def forward(self, img, flow, imu):
        img_x = self.backbone_img(img)
        flow_x = self.backbone_flow(flow)

        if self.print_shape:
            for i in img_x:
                print(f"Img shape: {i.shape}")
            for i in flow_x:
                print(f"Flow shape: {i.shape}")
            # for i in imu_x:
            #     print(i.shape)

        x = img_x + flow_x
        
        x = self.head(x)
        
        if self.print_shape:
            print(f"Output shape: {x.shape}")
            self.print_shape = False
        
        x = F.interpolate(x, size=(height, width), mode='bilinear', align_corners=False)
        return x

# model = SemSegModel().to(device)
model = torch.load("models/last.pth").to(device)
model.eval()
# model = torch.load("/home/thiago/Workspace/motion-segmentation/src/models/last_resnet.pth").to(device)

In [None]:
kitti_dir = "/media/thiago/Data/Datasets/kitti/dataset/sequences/03"
generate_flow = True
image_dir = os.path.join(kitti_dir, "image_2")
flow_dir = os.path.join(kitti_dir, "flow")
pred_dir = os.path.join(kitti_dir, "pred")
if not os.path.exists(pred_dir):
    os.makedirs(pred_dir)
if not os.path.exists(flow_dir):
    os.makedirs(flow_dir)
images = sorted(os.listdir(image_dir))
def get_next_image_name(filename):
    # 000005.png -> 000010.png, always 6 digits
    number = int(filename.split(".")[0])
    number += 2
    return f"{number:06}.png"

In [None]:
# compute optical flow
if generate_flow:
    flow_model = ptlflow.get_model('dicl', pretrained_ckpt='kitti').cuda()
    def flow_single_image(img_name):
        try:
            next_image_name = get_next_image_name(img_name)
            path = os.path.join(image_dir, img_name)
            next_image_path = os.path.join(image_dir, next_image_name)

            flow_path = os.path.join(flow_dir, img_name)

            # extract dir and create if not exists
            # flow_dir = os.path.dirname(flow_path)
            # recursively remove flow_dir
            # if not os.path.exists(flow_dir):
            #     os.makedirs(flow_dir, exist_ok=True)
            # get next image name
            # root_path is all before "cam0"
            # compute flow
            if next_image_path is None:
                return
            
            prev_image = cv2.imread(path)
            next_image = cv2.imread(next_image_path)

            if prev_image is None or next_image is None:
                return

            imgs = [prev_image, next_image]
            io_adapter = IOAdapter(flow_model, imgs[0].shape[:2])
            inputs = io_adapter.prepare_inputs(imgs)
            inputs['images'] = inputs['images'].cuda()

            predictions = flow_model(inputs)
            flows = predictions['flows']
            flow_rgb = flow_utils.flow_to_rgb(flows, flow_max_radius=150)
            # Make it a numpy array with HWC shape
            flow_rgb = flow_rgb[0, 0].permute(1, 2, 0)
            flow = flow_rgb.detach().cpu().numpy()
            # OpenCV uses BGR format
            flow = cv2.cvtColor(flow, cv2.COLOR_RGB2BGR)
            # to 0-255
            flow = (flow * 255).astype(np.uint8)
            # return to original size
            # flow = cv2.resize(flow, (width, height))
            # save flow image
            cv2.imwrite(flow_path, flow)
            
            
            # free memory
            del prev_image
            del next_image
            del flow
            del flow_rgb
            del flows
            del predictions
            del inputs
            del imgs
            gc.collect()
        except Exception as e:
            print(f"Skipping {img_name} due to {e}\n")
    # using pool, with tqdm
    # with Pool(28) as p:
    #     list(tq.tqdm(p.imap(flow_single_image, images), total=len(images)))\

    # without pool
    for i, image in tq.tqdm(enumerate(images), total=len(images)):
        flow_single_image(image)

In [None]:
video_path = os.path.join(kitti_dir, "results.avi")
# width, height = 512, 512
img0 = Image.open(os.path.join(image_dir, images[0]))
original_width, original_height = img0.size
video = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'XVID'), 15, (original_width, original_height))
for i in tq.tqdm(range(len(images))):
    img_path = os.path.join(image_dir, images[i])
    img = Image.open(img_path)
    img = transform(img)
    img = normalize(img).unsqueeze(0).to(device)
    flow_path = os.path.join(flow_dir, images[i])
    if not os.path.exists(flow_path):
        continue
    flow = Image.open(flow_path)
    flow = transform(flow)
    flow = torch.div(flow, 255).unsqueeze(0).to(device)
    imu = torch.zeros(2, 3, 100).to(device)
    pred = model(img, flow, imu)

    img = img.squeeze(0).cpu().detach()
    img = img * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1) + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    img = img.numpy()
    img = np.transpose(img, (1, 2, 0))
    # label = label.squeeze(0).cpu().detach().numpy()
    pred = pred.squeeze(0).cpu().detach().numpy()

    pred_output = np.argmax(pred, axis=0)
    pred_path = os.path.join(pred_dir, images[i])
    # grey scale, class 0 is 255, class 1 is 128, class 2 is 0
    pred_output[pred_output == 0] = 255
    pred_output[pred_output == 1] = 128
    pred_output[pred_output == 2] = 0
    pred_output = pred_output.astype(np.uint8)
    pred_output = cv2.resize(pred_output, (original_width, original_height))
    cv2.imwrite(pred_path, pred_output)

    painted_img = to_painted_image(img, pred)
    painted_img = painted_img * 255
    painted_img = painted_img.astype(np.uint8)
    painted_img = cv2.cvtColor(painted_img, cv2.COLOR_RGB2BGR)
    painted_img = cv2.resize(painted_img, (original_width, original_height))
    video.write(painted_img)
    