In [1]:
import os
import json
import pandas as pd
from PIL import Image
import sys

sys.path.append(os.path.abspath("../"))  # Ensure src is in sys.path

from utils.VisualisationUtils import *

In [2]:
dataset_dir = r'C:\Users\Sai\Documents\Neu\Masters Project\PerceptionPrivacy\datasets\VizWiz'
annotations_dir = os.path.join(dataset_dir, 'Annotations')

In [3]:
annotations = json.load(open(os.path.join(annotations_dir, 'dataset.json')))

In [4]:
print('Total Number of images:', len(annotations))

Total Number of images: 13571


In [5]:
for file in os.listdir(annotations_dir):
    if file.endswith('.json'):
        print(file)
        annotations_t = json.load(open(os.path.join(annotations_dir, file)))
        print('Total Number of images:', len(annotations_t))

dataset.json
Total Number of images: 13571
test.json
Total Number of images: 3409
train.json
Total Number of images: 8791
trainval.json
Total Number of images: 10162
val.json
Total Number of images: 1371


In [6]:
# Read the annotations
annotations_df = pd.read_json(os.path.join(annotations_dir, 'dataset.json'))

In [7]:
annotations_df.head()

Unnamed: 0,image,private_regions,private,question
0,VizWiz_v2_000000031173.jpg,[],0,
1,VizWiz_v2_000000031174.jpg,[],0,
2,VizWiz_v2_000000031175.jpg,[],0,
3,VizWiz_v2_000000031176.jpg,[],0,
4,VizWiz_v2_000000031177.jpg,"[{'polygon': [[397, 479], [535, 473], [541, 49...",1,


In [8]:
# Count the number of annotations with private 1
annotations_df['private'].value_counts()

private
0    8081
1    5490
Name: count, dtype: int64

In [23]:
# Get the samples with private 1 (samples that have segmentations)
segmentation_annotations = annotations_df[annotations_df['private'] == 1]
label = segmentation_annotations['private_regions'].iloc[0][0]['class'] # Class name
print(label)

Text:Computer Screen


In [None]:
# Get the count of each class and add class column to the dataframe
classes = [] # this will contain all the classes of all the instances
classes2 = [] # this will contain all the unique classes of all the images
for i in range(len(segmentation_annotations)):
    temp_classes = set()
    for j in range(len(segmentation_annotations['private_regions'].iloc[i])):
        classes.append(segmentation_annotations['private_regions'].iloc[i][j]['class'])
        temp_classes.add(segmentation_annotations['private_regions'].iloc[i][j]['class'])
    classes2.extend(list(temp_classes))
        
classes_df = pd.DataFrame(classes, columns=['class'])
print('Number of instances of each class')
print(classes_df['class'].value_counts()) # these are number of instances of each class, not number of images
print('\nNumber of images with each class')
print(pd.Series(classes2).value_counts()) # these are number of images with each class

Number of instances of each class
class
Object:Face                     2814
Text:Miscellaneous Papers       1497
Text:Computer Screen             663
Text:Other                       623
Text:Letter                      616
Object:Framed Photo              552
Text:Pill Bottle/Box             350
Text:Credit Card                 345
Text:Suspicious                  324
Text:Business Card               271
Text:Newspaper                   133
Object:Face Reflection           115
Text:License Plate                87
Object:Other                      85
Text:Receipt                      82
Object:Pregnancy Test Result      67
Text:Street Sign                  31
Text:Clothing                     29
Object:Suspicious                 26
Object:Tattoo                     25
Text:Menu                         18
Text:Poster                       12
Text:Book                          6
Name: count, dtype: int64

Number of images with each class
Object:Face                     2249
Text:Miscell

In [11]:
# add class column to the dataframe based on the class of the private region in the image
annotations_df['class'] = annotations_df.apply(lambda x: list(set([region['class'] for region in x['private_regions']])) if x['private'] == 1 else ['non-private'], axis=1)
annotations_df['polygon'] = annotations_df.apply(lambda x: [region['polygon'] for region in x['private_regions']] if x['private'] == 1 else ['non-private'], axis=1)
annotations_df.head()

Unnamed: 0,image,private_regions,private,question,class,polygon
0,VizWiz_v2_000000031173.jpg,[],0,,[non-private],[non-private]
1,VizWiz_v2_000000031174.jpg,[],0,,[non-private],[non-private]
2,VizWiz_v2_000000031175.jpg,[],0,,[non-private],[non-private]
3,VizWiz_v2_000000031176.jpg,[],0,,[non-private],[non-private]
4,VizWiz_v2_000000031177.jpg,"[{'polygon': [[397, 479], [535, 473], [541, 49...",1,,[Text:Computer Screen],"[[[397, 479], [535, 473], [541, 496], [403, 50..."


In [12]:
# get annotations with more than 1 class
annotations_df[annotations_df['class'].apply(lambda x: len(x) > 1)]

Unnamed: 0,image,private_regions,private,question,class,polygon
90,VizWiz_v2_000000031263.jpg,"[{'polygon': [[255, 919], [796, 951], [787, 10...",1,,"[Text:Menu, Text:Suspicious]","[[[255, 919], [796, 951], [787, 1075], [248, 1..."
110,VizWiz_v2_000000031283.jpg,"[{'polygon': [[7, 775], [33, 869], [93, 967], ...",1,,"[Object:Suspicious, Object:Other]","[[[7, 775], [33, 869], [93, 967], [186, 1062],..."
117,VizWiz_v2_000000031290.jpg,"[{'polygon': [[96, 84], [139, 68], [146, 85], ...",1,,"[Text:Letter, Text:Business Card]","[[[96, 84], [139, 68], [146, 85], [104, 99]], ..."
153,VizWiz_v2_000000031326.jpg,"[{'polygon': [[248, 423], [870, 339], [874, 77...",1,,"[Text:Computer Screen, Text:Other, Object:Other]","[[[248, 423], [870, 339], [874, 779], [248, 78..."
158,VizWiz_v2_000000031331.jpg,"[{'polygon': [[614, 0], [699, 130], [792, 130]...",1,,"[Object:Framed Photo, Text:Clothing, Object:Face]","[[[614, 0], [699, 130], [792, 130], [848, 93],..."
...,...,...,...,...,...,...
13547,VizWiz_v2_000000044779.jpg,"[{'polygon': [[301, 965], [282, 857], [325, 66...",1,,"[Text:Computer Screen, Object:Face Reflection]","[[[301, 965], [282, 857], [325, 661], [379, 63..."
13549,VizWiz_v2_000000044781.jpg,"[{'polygon': [[206, 310], [237, 459], [220, 51...",1,,"[Object:Face, Object:Tattoo]","[[[206, 310], [237, 459], [220, 516], [222, 57..."
13558,VizWiz_v2_000000044790.jpg,"[{'polygon': [[652, 207], [722, 286], [673, 33...",1,,"[Object:Framed Photo, Object:Face]","[[[652, 207], [722, 286], [673, 334], [585, 33..."
13569,VizWiz_v2_000000044801.jpg,"[{'polygon': [[209, 261], [452, 192], [749, 34...",1,,"[Text:Street Sign, Object:Face]","[[[209, 261], [452, 192], [749, 342], [829, 53..."


In [20]:
img_dir_mapping = {}
dirs = ['HoleMean_Images', 'Filling_Image', 'Original_Images_Nonprivate']
for dir in dirs:
    fulldir = os.path.join(dataset_dir, dir)
    for file in os.listdir(os.path.join(dataset_dir, fulldir)):
        img_dir_mapping[file] = os.path.join(fulldir, file)

In [None]:
class_name = 'Text:Computer Screen'
# Get the image with the class name
images = []
for image in annotations_df[annotations_df['class'].apply(lambda x: class_name in x)]['image']:
    img = Image.open(img_dir_mapping[image])
    images.append(img)

print('Number of images with class', class_name, ':', len(images))

for i in range(0, len(images), 4):
    show_images(images[i:i+4], figsize=(15, 10))

n_remaining = len(images) % 4
if n_remaining > 0:
    show_images(images[-n_remaining:], figsize=(15, 10))