In [None]:
import sys
from torch.utils.data import random_split, DataLoader 
sys.path.append("../")

In [None]:
from datasets import datasets
import constants
import pickle
import os
import collections
import pandas
from PIL import Image
import matplotlib.pyplot as plt
import numpy
import cv2
from feature_extraction import texture_analysis

Loading images from folders

In [None]:
image_paths = collections.defaultdict(list)

for path in os.listdir("../data/raw_data"):

    class_name = path.lower()
    full_path = os.path.join("../data/raw_data", class_name)

    for fil in os.listdir(full_path):
        image_paths[class_name].append(os.path.join(full_path, fil))

Representing number of images for each individual class

In [None]:
for class_name, paths in image_paths.items():
    print(class_name, 'number of images: ', len(paths))

Creating dataframe of images

In [None]:
image_dataset = pandas.DataFrame(
    image_paths,
    columns=['class', 'path']
)

# exploding dataset for extracting each individual object
image_dataset['path'] = image_dataset['path'].explode()

# Inserting actual image objects inside the dataframe
image_dataset['image'] = image_dataset['path'].apply(
    lambda item: Image.open(item)
)

Forming Data Augmentation Instructions

In [None]:
HEIGHT = 512
WIDTH = 512

In [None]:
from torchvision import transforms 
from PIL import Image


train_transformations = [
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Resize((HEIGHT, WIDTH), interpolation=Image.NEAREST),
    transforms.RandomAdjustSharpness(sharpness_factor=1.3, p=0.5),
]

eval_transformations = [
    transforms.ToTensor(),
    transforms.Resize((HEIGHT, WIDTH), interpolation=Image.NEAREST),
]

Splitting dataset

In [None]:
from torch.utils.data import random_split

train_size = int(len(image_dataset) * 0.7) # 70% of the data will be in training set
evaluation_size = len(image_dataset) - train_size # rest of 30% will be in evaluation set

train_d, test_d = random_split(
    image_dataset, 
    [train_size, evaluation_size]
)

Splitting data

In [None]:
# training set
train_dataset = datasets.FaceRecognitionDataset(
    images=numpy.array(image_dataset.images)[train_d.indices],
    labels=numpy.array(image_dataset.labels)[train_d.indices]
)

# evaludation dataset
test_dataset = datasets.FaceRecognitionDataset(
    images=numpy.array(image_dataset.images)[test_d.indices],
    labels=numpy.array(image_dataset.labels)[test_d.indices]
)

Image Visualization

In [None]:
def visualize_k_random_images(images, k):
    """
    Function visualizes k images, extracted from given source
    
    Args:
        images - typing.List[str] - array of images
        k - number of images
    """
    _, ax = plt.subplots(2, images // k)
    k_random_images = numpy.random.choice(size=2 * images // k, a=images)
    for col in range(ax.shape[0]):
        for row in range(ax.shape[0]):
            ax[col, row].imshow(k_random_images.pop())
        
visualize_k_random_images(train_dataset.images)

Extracting noisy images

In [None]:
def is_noisy(self, image, threshold: int = 10):
    scaled_img = cv2.cvtColor(image, cv2.IMREAD_GRAYSCALE)
    variance = cv2.Laplacian(src=scaled_img, threshold=threshold).var()
    return variance < threshold

In [None]:
noisy_images = numpy.array(
    train_dataset.images
)[numpy.where(is_noisy(train_dataset.images, 10))[1]]

Visualizing noisy images

In [None]:
plot, ax = plt.subplots()

for col in enumerate(noisy_images.shape[0]):
    for row in range(noisy_images.shape[0]):
        ax[col, row].imshow(noisy_images)
plt.show()

Smoothing noisy images

In [None]:
def apply_median_filter(image: numpy.ndarray, kernel_size: int):
    """
    Function applies standard non-linear median filter 
    to image for removing salt-and-papper noise from image
    """
    if len(image) == 0: return 
    filtered_img = cv2.medianBlur(
        src=image,
        ksize=kernel_size
    )
    return filtered_img

def apply_bilateral_filtering(
    image: numpy.ndarray, 
    kernel_size: int,
    sigma_space: int, 
    sigma_color: int
):
    """
    Function applies bilateral filter to given image
    """
    smoothed_img = cv2.bilateralFilter(
        src=image,
        sigmaColor=sigma_color,
        sigmaSpace=sigma_space,
        d=kernel_size,
    )
    return smoothed_img

In [None]:
for idx, image in enumerate(noisy_images.flatten()):
    # Applying filter to noisy image
    blurred_img = apply_bilateral_filtering(
        image=image,
        sigma_color=30,
        sigma_space=30,
        kernel_size=5
    )
    train_dataset.images[idx] =  blurred_img

Texture Analysis

In [None]:
image_dataset['texture_features'] = image_dataset['image'].apply(
lambda image: texture_analysis.get_texture_features(image=image))

Evaluating Transformed Images

In [None]:
from skimage.metrics import ssim, ncc

def ssim_score(orig_img, blur_img):
    return ssim(orig_img, blur_img)

def normalized_cross_correlation(orig_img, blur_img):
    return ncc(orig_img, blur_img)

In [None]:
imgs = image_dataset['image'].to_numpy()

for idx, image in zip(train_dataset.dataset_idxs, train_dataset.images):
    ssim = ssim_score(imgs[idx], image)
    cc = normalized_cross_correlation(imgs[idx], image)
    print('ssim - %s; normalized cross correlation - %s;' % ssim, cc)

Saving datasets to pickle format

In [None]:
pickle.dumps(train_dataset, "../data/processed_data/training_set.pkl")
pickle.dumps(test_dataset, "../data/processed_data/testing_set.pkl")