## Notebook to get segmentation mask from some coco-validation-2017 images. 
### The code in this notebook is only for images with dogs and cats labels (cat_ids = [17 (cats), 18 (dogs)]) using COCO dataset.

In [107]:
import pandas as pd
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image
from pycocotools.coco import COCO
import os
import json
import pickle
import shutil
import matplotlib.pyplot as plt
%matplotlib inline

# Dog and cat categories

In [112]:
categories_dict = {"cat": 17, "dog": 18} 
coco = COCO('raw/instances_val2017.json')
img_dir = 'train_100dogs_100cats_100nocatsnodogs/images'

loading annotations into memory...
Done (t=1.49s)
creating index...
index created!


In [99]:
# Function to get some dogs and cats images, individually, with its masks
label_cat_l = []
for cat_name, cat_id in categories_dict.items():
    labels = json.load(open('train_100dogs_100cats_100nocatsnodogs/labels_'+cat_name+'.json'))
    val_images_df = pd.DataFrame(labels['images'])
    for _ , val_img in val_images_df.iterrows():
        img_id = val_img['id']
        val_ann_df = pd.DataFrame([l for l in labels['annotations'] if l['image_id'] == img_id])
        if not (17 in val_ann_df['category_id'].unique() and 18 in val_ann_df['category_id'].unique()):
            img = np.array(Image.open(os.path.join(img_dir, val_img['file_name'])))
            cat_ids = coco.getCatIds()
            anns_ids = coco.getAnnIds(imgIds=img_id, catIds=[cat_id], iscrowd=None)
            anns = coco.loadAnns(anns_ids)
            # coco.showAnns(anns)
            if len(anns)>0:
                mask = coco.annToMask(anns[0]) * 0.0
                for i in range(len(anns)):
                    mask += coco.annToMask(anns[i]) #* (i+1) #Comentamos esto porque vamos segmentar las imagenes en cada una de las dos clases de forma individual.
                mask[mask==2.0] = 0.0

                # fig, ax =  plt.subplots(1, 3, figsize=(10,4))
                # fig.suptitle('Image: ' + str(img_id) + ' # of cats/dogs: ' + str(len(anns)), y=0.8)
                # ax[0].imshow(img)
                # ax[1].imshow(mask)
                # ax[2].imshow(img)
                # ax[2].imshow(mask, alpha=0.4)
                # plt.show()
                label_cat_l.append({"image_id": img_id, 
                                        "image_path": val_img['file_name'].split(".")[0]+'.jpg',
                                        "mask_image_path": val_img['file_name'].split(".")[0]+'_mask.pkl',
                                        "label": cat_name,
                                        "num_elements_of_label_category": len(anns)})
                if not os.path.isfile('train/masks/'+val_img['file_name'].split(".")[0]+'_mask.pkl'):
                    with open('train/masks/'+val_img['file_name'].split(".")[0]+'_mask.pkl', 'wb') as f:
                        pickle.dump(mask, f)
                label_cat_df = pd.DataFrame(label_cat_l)
                if label_cat_df[label_cat_df['label']==cat_name].shape[0]==100:
                    break
    label_cat_df.to_csv('train/train_for_OutSegmentationModelTest.csv', index=False)


In [115]:
train_df = pd.read_csv('train_100dogs_100cats_100nocatsnodogs/train_for_OutSegmentationModelTest.csv')
train_df.head()

Unnamed: 0,image_id,image_path,mask_image_path,label,num_elements_of_label_category
0,443303,000000443303.jpg,000000443303_mask.pkl,cat,1
1,25560,000000025560.jpg,000000025560_mask.pkl,cat,1
2,403817,000000403817.jpg,000000403817_mask.pkl,cat,1
3,366141,000000366141.jpg,000000366141_mask.pkl,cat,1
4,166277,000000166277.jpg,000000166277_mask.pkl,cat,1


In [108]:
for img_filename in os.listdir(os.path.join('train_100dogs_100cats_100nocatsnodogs', 'images')):
    if train_df[train_df['image_path']==img_filename].shape[0]==0:
        src_path = os.path.join('train_100dogs_100cats_100nocatsnodogs', 'images', img_filename)
        dst_path = os.path.join('test', 'images', img_filename)
        shutil.move(src_path, dst_path)

# Categories that are neither cat nor dog 
### (I have randomly chosen: person, horse, bird, zebra and cow)

In [154]:
# Function to get some images where cats and dogs can not be found.
categories_notcatnotdog_dict = {"person":1, "horse": 19, "bird": 16, "zebra": 24, "cow": 21}
label_cat_l = []
for cat_name, cat_id in categories_notcatnotdog_dict.items():
    labels = json.load(open('train_100dogs_100cats_100nocatsnodogs/labels_'+cat_name+'.json'))
    val_images_df = pd.DataFrame(labels['images'])
    for _ , val_img in val_images_df.iterrows():
        img_id = val_img['id']
        val_ann_df = pd.DataFrame([l for l in labels['annotations'] if l['image_id'] == img_id])
        if not (17 in val_ann_df['category_id'].unique() or 18 in val_ann_df['category_id'].unique()):
            img = np.array(Image.open(os.path.join(img_dir, val_img['file_name'])))
            cat_ids = coco.getCatIds()
            anns_ids = coco.getAnnIds(imgIds=img_id, catIds=[cat_id], iscrowd=None)
            anns = coco.loadAnns(anns_ids)
            # coco.showAnns(anns)
            if len(anns)>0:
                mask = coco.annToMask(anns[0]) * 0.0
                # for i in range(len(anns)):
                #     mask += coco.annToMask(anns[i]) #* (i+1) #Comentamos esto porque vamos segmentar las imagenes en cada una de las dos clases de forma individual.
                # mask[mask==2.0] = 0.0

                # fig, ax =  plt.subplots(1, 3, figsize=(10,4))
                # fig.suptitle('Image: ' + str(img_id) + ' # of cats/dogs: ' + str(len(anns)), y=0.8)
                # ax[0].imshow(img)
                # ax[1].imshow(mask)
                # ax[2].imshow(img)
                # ax[2].imshow(mask, alpha=0.4)
                # plt.show()

                if not os.path.isfile('train_100dogs_100cats_100nocatsnodogs/masks_nocat_nodog/'+val_img['file_name'].split(".")[0]+'_mask.pkl'):
                    label_cat_l.append({"image_id": img_id, 
                        "image_path": val_img['file_name'].split(".")[0]+'.jpg',
                        "mask_image_path": val_img['file_name'].split(".")[0]+'_mask.pkl',
                        "label": cat_name,
                        "num_elements_of_label_category": len(anns)})
                    label_cat_df = pd.DataFrame(label_cat_l)
                    with open('train_100dogs_100cats_100nocatsnodogs/masks_nocat_nodog/'+val_img['file_name'].split(".")[0]+'_mask.pkl', 'wb') as f:
                        pickle.dump(mask, f)
                    
                if label_cat_df[label_cat_df['label']==cat_name].shape[0]==20:
                    break          
    label_cat_df.to_csv('train_100dogs_100cats_100nocatsnodogs/train_for_OutSegmentationModelTest_nocatnodog.csv', index=False)

# Join both imags from the 3 categories (cats, dogs and nocats_nodogs)

In [159]:
train_nocats_df = pd.read_csv('train_100dogs_100cats_100nocatsnodogs/train_for_OutSegmentationModelTest_nocatnodog.csv')
# train_nocats_df.head()
train_df = pd.read_csv('train_100dogs_100cats_100nocatsnodogs/train_for_OutSegmentationModelTest_catdog.csv')
train_df = pd.concat([train_df, train_nocats_df])
train_df.shape


(300, 5)

In [160]:
for img_filename in os.listdir(os.path.join('train_100dogs_100cats_100nocatsnodogs', 'images')):
    if train_df[train_df['image_path']==img_filename].shape[0]==0:
        src_path = os.path.join('train_100dogs_100cats_100nocatsnodogs', 'images', img_filename)
        dst_path = os.path.join('test', 'images', img_filename)
        shutil.move(src_path, dst_path)

In [162]:
train_df.to_csv('train_100dogs_100cats_100nocatsnodogs/train_for_OutSegmentationModelTest.csv', index=False)