In [None]:
import sys
from torch.utils.data import random_split, DataLoader 
sys.path.append("../")

In [None]:
from datasets import datasets
import constants
import pickle
import os
import collections
import pandas
from PIL import Image
import matplotlib.pyplot as plt
import numpy
import cv2
from augmentation import augmentation

Loading images from folders

In [None]:
image_paths = collections.defaultdict(list)

for path in os.listdir("../data/raw_data"):

    class_name = path.lower()
    full_path = os.path.join("../data/raw_data", class_name)

    for fil in os.listdir(full_path):
        image_paths[class_name].append(os.path.join(full_path, fil))

Representing number of images for each individual class

In [None]:
for class_name, paths in image_paths.items():
    print(class_name, 'number of images: ', len(paths))

Image Augmentation (creating various image types, including blurred, with noise, etc...)

In [None]:
augmented_images = augmentation.apply_augmentations(
    images=list(image_paths.values()),
    labels=list(image_paths.keys())
)

Creating dataframe of images

In [None]:
image_dataset = pandas.DataFrame(
    image_paths,
    columns=['class', 'path']
)

# exploding dataset for extracting each individual object
image_dataset['path'] = image_dataset['path'].explode()

# Inserting actual image objects inside the dataframe
image_dataset['image'] = image_dataset['path'].apply(
    lambda item: Image.open(item)
)

Image Visualization

In [None]:
def visualize_k_random_images(images, k):
    """
    Function visualizes k images, extracted from given source
    
    Args:
        images - typing.List[str] - array of images
        k - number of images
    """
    _, ax = plt.subplots(2, images // k)
    k_random_images = numpy.random.choice(size=2 * images // k, a=images)
    for col in range(ax.shape[0]):
        for row in range(ax.shape[0]):
            ax[col, row].imshow(k_random_images.pop())
        
visualize_k_random_images(image_dataset['Image'])

Removing Noise from images using smoothing filters

In [None]:
def apply_gaussian_filter(image: numpy.ndarray, kernel_size: int):
    """
    Function applies standard linear 2d Gaussian Filter 
    to image 

    kernel_size - size of the nxn kernel to apply
    """
    if not len(image): return
    filtered_img = cv2.GaussianBlur(image, ksize=kernel_size)
    return filtered_img

def apply_median_filter(image: numpy.ndarray, kernel_size: int):
    """
    Function applies standard non-linear median filter 
    to image for removing salt-and-papper noise from image
    """
    if len(image) == 0: return 
    filtered_img = cv2.medianBlur(
        src=image,
        ksize=kernel_size
    )
    return filtered_img
    

Image Enhancement

In [None]:
def gamma_convertion():
    """
    """

Grayscale Convertion

In [None]:
def gray_convert(image):
    gray_img = cv2.cvtColor(image)
    return gray_img

Image thresholding

In [None]:
def apply_thresholding(image, desired_threshold: float):
    """
    Function applies image thresholding to the data
    """
    if not image or not len(image):
        return 

    binary_img = cv2.threshold(image, thresh=desired_threshold)
    return binary_img

Edge Detection

In [None]:
def detect_canny_edges():
    pass

Feature Extraction

In [None]:
def extract_features():
    pass

Texture Analysis

In [None]:
def analyze_image_face_textures():
    pass

Splitting data into training and testing sets

In [None]:
train_size = int(len(images) * 0.7)
test_size = int(len(images) * 0.3)

train_data, test_data = random_split(
    image_dataset, 
    [train_size, test_size]
)

Forming datasets

In [None]:
train_dataset = datasets.FaceRecognitionDataset(
    images=train_data['path'],
    labels=train_data['class'],
)

test_dataset = datasets.FaceRecognitionDataset(
    images=test_data['path'],
    labels=test_data['class']
)

In [None]:
training_loader = DataLoader(
    dataset=train_dataset, 
    batch_size=constants.BATCH_SIZE,
    shuffle=True
)

testing_loader = DataLoader(
    dataset=test_dataset,
    batch_size=constants.BATCH_SIZE,
    shuffle=True
)

Saving datasets to pickle format

In [None]:
pickle.dumps(training_loader, "../data/augmented_data/training_loader.pkl")
pickle.dumps(testing_loader, "../data/augmented_data/testing_loader.pkl")