In [1]:
import numpy as np
import pandas as pd
import cv2
import json
from tqdm import tqdm
# For reading annotations file
from pycocotools.coco import COCO

from collections import OrderedDict
import os 
# For data visualisation
import matplotlib.pyplot as plt
import glob

from skimage.exposure import is_low_contrast
from imutils.paths import list_images
import imutils
import copy

## Blur filtering

In [2]:
# Reading annotations.json
TRAIN_ANNOTATIONS_PATH = "data/train/new_top50cat_train_filter.json"
#TRAIN_ANNOTATIONS_PATH = "data/train/new_ann_train_filter.json"
TRAIN_IMAGE_DIRECTIORY = "data/train/images/"
VAL_ANNOTATIONS_PATH = "data/val/new_top50cat_val_filter.json"
#VAL_ANNOTATIONS_PATH = "data/val/new_ann_val_filter.json"
VAL_IMAGE_DIRECTIORY = "data/val/images/"

train_coco = COCO(TRAIN_ANNOTATIONS_PATH)

loading annotations into memory...
Done (t=2.40s)
creating index...
index created!


In [4]:
# Reading the annotation files
with open(TRAIN_ANNOTATIONS_PATH) as f:
    train_annotations_data = json.load(f)
with open(VAL_ANNOTATIONS_PATH) as f:
    val_annotations_data = json.load(f)

In [5]:
train_annotations_data.keys()

dict_keys(['info', 'images', 'annotations', 'categories'])

In [6]:
train_annotations_data['images'][0]

{'id': 131094, 'file_name': '131094.jpg', 'width': 480, 'height': 480}

In [7]:
train_annotations_data['annotations'][0]

{'id': 184135,
 'image_id': 131094,
 'category_id': 1352,
 'segmentation': [[115.0,
   206.5,
   98.0,
   204.5,
   74.5,
   182.0,
   65.0,
   167.5,
   47.5,
   156.0,
   39.5,
   137.0,
   39.5,
   130.0,
   51.0,
   118.5,
   62.00000000000001,
   112.5,
   76.0,
   113.5,
   121.5,
   151.0,
   130.5,
   169.0,
   131.5,
   185.0,
   128.5,
   195.0]],
 'area': 5059.0,
 'bbox': [39.5, 39.5, 167.0, 92.0],
 'iscrowd': 0}

In [8]:
def filter_blur(annotations,image_path,json_path):
    new_annotations = dict()
    new_annotations['info'] = copy.deepcopy(annotations['info'])
    new_annotations['categories'] = copy.deepcopy(annotations['categories'])
    blur_threshold = 50 # blur threshold
    blur_image_id_list = []
    new_image_list = []
    new_annotation_list = []
    for image in tqdm(annotations['images']):
        
        file_path = image_path + image['file_name']
        image_read = cv2.imread(file_path)
        gray = cv2.cvtColor(image_read, cv2.COLOR_BGR2GRAY)
        fm = cv2.Laplacian(gray, cv2.CV_64F).var()
        if fm < blur_threshold:
            blur_image_id_list.append(image['id'])
        else:
            new_image_list.append(image)
            
            
    for image in tqdm(new_image_list):
        image_id = image['id']
        for annotation in annotations['annotations']:
            if annotation['image_id'] == image_id:
                new_annotation = copy.deepcopy(annotation)
                new_annotation_list.append(new_annotation)
    new_annotations['images'] = new_image_list
    new_annotations['annotations'] = new_annotation_list
    print(len(blur_image_id_list))
    print(len(new_image_list))
    with open(json_path, 'w') as f:
        json.dump(new_annotations, f)

In [11]:
filter_blur(val_annotations_data,"data/val/images/","data/val/new_top50cat_val_filter_no_blur.json")

100%|██████████| 583/583 [00:04<00:00, 129.76it/s]
100%|██████████| 507/507 [00:00<00:00, 10349.60it/s]


76
507


In [None]:
filter_blur(train_annotations_data,"data/train/images/","data/train/new_top50cat_train_filter_no_blur.json")

## Contrast increaing

### Detect low contrast

In [7]:

def get_low_contrast_list(data_path):
    low_contrast_list = []
    for image_info in tqdm(train_annotations_data['images']):
        file_path = data_path + image_info['file_name']
        image = cv2.imread(file_path)
        image = imutils.resize(image, width=450)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        edged = cv2.Canny(blurred, 30, 150)
        if is_low_contrast(gray, fraction_threshold=0.6):
            low_contrast_list.append(image_info['file_name'])
    return low_contrast_list

100%|██████████| 39962/39962 [22:54<00:00, 29.08it/s] 


In [None]:
low_contrast_list_train = get_low_contrast_list('data/train/images')
low_contrast_list_val = get_low_contrast_list('data/train/val')

In [8]:
print(len(low_contrast_list_train))
print(len(low_contrast_list_val))


2984

### Increas contrast if low contrast

In [None]:
def improve_contrast(low_contrast_list, data_path):

    for image_name in tqdm(low_contrast_list):
        file_path = data_path + image_name
        image = cv2.imread(file_path)
        new_image = np.zeros(image.shape, image.dtype)
        alpha = 1.2 # Simple contrast control
        beta = 30  # Simple brightness control
        new_image = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
        # save image(will chage the original image)
        cv2.imwrite(file_path, new_image)
    

In [None]:
improve_contrast(low_contrast_list_train, "data/train/images/")
improve_contrast(low_contrast_list_val, "data/val/images/")