In [1]:
from jaad_data import JAAD
import torch
from PIL import Image
from torchvision import transforms
from torchvision import models
import matplotlib.pyplot as plt
import network
import pickle

In [2]:
JAAD_PATH = '../JAAD'
DEEPLAB_PATH = '../best_deeplabv3plus_resnet101_cityscapes_os16.pth'
SUBSET_PATH = '../subset'

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [4]:
# Load the JAAD dataset
jaad_dt = JAAD(data_path=JAAD_PATH)
#jaad.generate_database()
#jaad_dt.get_data_stats()

data_opts = {
    'sample_type': 'beh'
}

seq_train = jaad_dt.generate_data_trajectory_sequence('train', **data_opts)  
seq_test = jaad_dt.generate_data_trajectory_sequence('test', **data_opts)  

---------------------------------------------------------
Generating action sequence data
fstride: 1
sample_type: beh
subset: default
height_rng: [0, inf]
squarify_ratio: 0
data_split_type: default
seq_type: intention
min_track_size: 15
random_params: {'ratios': None, 'val_data': True, 'regen_data': False}
kfold_params: {'num_folds': 5, 'fold': 1}
---------------------------------------------------------
Generating database for jaad
jaad database loaded from c:\Users\jacop\Documents\ComputerVision\JAAD\data_cache\jaad_database.pkl
---------------------------------------------------------
Generating intention data
Split: train
Number of pedestrians: 324 
Total number of samples: 304 
---------------------------------------------------------
Generating action sequence data
fstride: 1
sample_type: beh
subset: default
height_rng: [0, inf]
squarify_ratio: 0
data_split_type: default
seq_type: intention
min_track_size: 15
random_params: {'ratios': None, 'val_data': True, 'regen_data': False}


In [5]:
train_transforms = transforms.Compose([
    #transforms.Resize((512, 512)),  # Ridimensiona le immagini a 256x256
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
""" deeplab_model = models.segmentation.deeplabv3_resnet101(pretrained=True).to(device)
deeplab_model.eval()  # Imposta il modello in modalità di valutazione """
#deeplab_model = models.segmentation.deeplabv3_resnet101(pretrained=True)
deeplab_model = network.modeling.__dict__['deeplabv3plus_resnet101'](num_classes=19)
deeplab_model.load_state_dict(torch.load(DEEPLAB_PATH)['model_state'])
deeplab_model.to(device)
deeplab_model.eval()

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [7]:
def get_segmentation_mask(image_path, model, preprocess):
    # Load the image
    input_image = Image.open(image_path).convert("RGB")
    input_tensor = preprocess(input_image).to(device)
    input_batch = input_tensor.unsqueeze(0)  # Create a batch with a single image
    
    # Pass the image through the model
    with torch.no_grad():
        output = model(input_batch)
        
    # Check if output is a tensor or a dictionary
    if isinstance(output, dict):
        output = output['out'][0]
    elif isinstance(output, torch.Tensor):
        output = output[0]
    else:
        raise ValueError(f"Unexpected output type: {type(output)}")
    
    # Convert the output to a mask
    output_predictions = output.argmax(0).cpu()
    return output_predictions

In [8]:
def process_video_frames(seq_train, model, preprocess):
    all_masks = []
    for video_frames in seq_train['image']:
        video_masks = []
        sampled_frame_paths = video_frames[::3]

        for frame_path in sampled_frame_paths:
            mask = get_segmentation_mask(frame_path, model, preprocess)
            visualize_mask(frame_path, mask)
            video_masks.append(mask)
        all_masks.append(video_masks)
    return all_masks

In [9]:
# Visualizza la maschera semantica
def visualize_mask(image_path, mask):
    image = Image.open(image_path).convert("RGB")
    #image = image.resize((512, 512))  # Ridimensiona per la visualizzazione
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.title("Original Image")
    plt.subplot(1, 2, 2)
    plt.imshow(mask, cmap='jet')
    plt.title("Semantic Mask")
    plt.show()


In [10]:
all_video_masks = process_video_frames(seq_train, deeplab_model, train_transforms)
seq_train['masks'] = all_video_masks


In [None]:
# Nome del file in cui vuoi salvare i risultati
filename = 'masks_results.pkl'

# Apri il file in modalità scrittura binaria e salva il dizionario
with open(filename, 'wb') as f:
    pickle.dump(seq_train['masks'], f)

In [None]:
""" for seq in seq_train['image']:
    for image_path in seq:
        if os.path.exists(image_path):
            mask = get_semantic_mask(image_path, model, preprocess)
            mask_image_path = image_path.replace('.jpg', '_mask.png')
            plt.imsave(mask_image_path, mask, cmap='jet') """