```bash
python -m venv mmlabs
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121
pip install mmcv==2.2.0 -f https://download.openmmlab.com/mmcv/dist/cu121/torch2.2/index.html
```
https://pytorch.org/get-started/previous-versions/
https://mmcv.readthedocs.io/en/latest/get_started/installation.html

In [None]:
obj_labels = {
    'airplane':5,
    'cat':17,
    'dog':18,
    'elephant':22,
    'sheep':20,
    'horse':19,
    'bus':6,
    'motorcycle':4,
    'car':3,
}

scene_labels = {
    'Cloud':[106], # cloud
    'Runway':[149, 157], # road, sky-other
    
    'Beach':[154, 155], # sand, sea
    'Desert':[156, 135], # sand, mountain
    'Forest':[169, 124], # tree, grass
    
    'City':[158, 96, 140], # skyscraper, building-other, pavement
    'Highway':[149], # road
}
    

In [None]:
import utils as u
import importlib
importlib.reload(u)
from utils import categories, categories_scenes

In [None]:
experiments1_dir = 'data/experiments1/'  # Aggregated Attention
experiments2_dir = 'data/experiments2/'  # Scene-Based
sd_baseline_dir = 'data/sd_baseline/'
importlib.reload(u)

resized_imgs, img_names = u.load_original_images('./data/original_resized')
imgs1 = u.load_experiment_images(experiments1_dir, to_list=True)  # Aggregated Attention
imgs2 = u.load_experiment_images(experiments2_dir,to_list=True)  # Scene-Based
sd_base = u.load_experiment_images(sd_baseline_dir,to_list=True)

In [None]:
from mmseg.apis import MMSegInferencer
model = 'pspnet_r101-d8_4xb4-320k_coco-stuff164k-512x512'
inferencer = MMSegInferencer(model=model)

In [None]:
# input_dir = 'data/original/Airplane'
# create masks for all original images
masks={}
vis = {}
for category in categories:
    input_dir = f'data/original_resized/{category}'
    results = inferencer(resized_imgs[category], return_vis=True)
    masks[category] = results['predictions']
    vis[category] = results['visualization']
u.save_imgs(masks, basedir='./data/masks', img_names=img_names, original=True)
u.save_imgs(vis, basedir='./data/masks/vis', img_names=img_names, original=True)

In [None]:
imgs = sd_base

masks={}
vis = {}
for category in categories:
    masks[category] = {}
    vis[category] = {}
    for scene in categories_scenes[category]:
        input_dir = f'data/original_resized/{category}/{scene}'
        results = inferencer(imgs[category][scene], return_vis=True)
        masks[category][scene] = results['predictions']
        vis[category][scene] = results['visualization']
u.save_imgs(masks, basedir='./data/masks_sd', img_names=img_names, original=False)
u.save_imgs(vis, basedir='./data/masks_sd/vis', img_names=img_names, original=False)

In [None]:
import matplotlib.pyplot as plt
import numpy as np


classes = {}
for category in categories:
    potential_classes = np.unique(masks[category])
    for img in masks[category]:
        # get classes of img
        img_class = np.unique(img)
        # if any of potential classes not in classes, drop it
        for p in potential_classes:
            if p not in img_class:
                potential_classes = potential_classes[potential_classes != p]
    classes[category] = potential_classes

classes

In [53]:
from pycocotools.coco import COCO
import requests
from PIL import Image
from io import BytesIO
import numpy as np
coco = COCO('/home/mashalimay/DL_project/Diffusion-Model-Latent-Space-Manipulation/stuff_val2017.json')


loading annotations into memory...
Done (t=0.20s)
creating index...
index created!


In [54]:
def fetch_images(cat_ids, num_images=5):
    # Get all images containing any of the category IDs
    img_ids = coco.getImgIds(catIds=cat_ids)
    # Filter to ensure all categories are present in each image
    filtered_img_ids = []
    for img_id in img_ids:
        ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_ids, iscrowd=None)
        annotations = coco.loadAnns(ann_ids)
        # Check if all category IDs are present in the annotations
        if len(cat_ids) == 1:
            if any(ann['category_id'] == cat_ids[0] for ann in annotations):
                filtered_img_ids.append(img_id)
        else:
            if all(any(ann['category_id'] == cat_id for ann in annotations) for cat_id in cat_ids):
                filtered_img_ids.append(img_id)
        if len(filtered_img_ids) >= num_images:
            break

    # Fetch the images
    images = []
    for img_id in filtered_img_ids:
        img = coco.loadImgs(img_id)[0]
        response = requests.get(img['coco_url'])
        image = Image.open(BytesIO(response.content))
        numpy_image = np.array(image)
        images.append(numpy_image)
    
    return images

In [64]:
gt_images = {}
for category, scenes in categories_scenes.items():
    gt_images[category] = {}
    for scene in scenes:
        print(f"Fetching images for {scene}")
        gt_images[category][scene] = fetch_images(scene_labels[scene], num_images=5)

Fetching images for Cloud
Fetching images for Runway
Fetching images for Beach
Fetching images for Forest
Fetching images for Desert
Fetching images for City
Fetching images for Highway


In [70]:
importlib.reload(u)
resized_gt = u.standardize_sizes(gt_images, (512, 512), original=False)

u.save_imgs(resized_gt, basedir='./data/gt', img_names=img_names, original=False)