# Initialization

## Imports and directory paths

In [36]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import glob
import cv2
import os,shutil
import random
import time

def charge_msg(msg):
    print(msg, end="\r")
    time.sleep(0)

pure_dataset_path = "pure_dataset"
small_dataset_path = "small_dataset"
normalized_dataset_path = "same_size_dataset"
dataset_path = "dataset"

# Calculate max image size

In [37]:
def calculate_dataset_max_size():
    max_height = -1
    max_width = -1
    gray_channels = -1
    color_channels = -1
    i = 1
    for path_image in glob.glob(pure_dataset_path + "/*.jpg"):
        charge_msg("Number of images processed: " + str(i))
        i = i + 1
        image_name = path_image.split('\\')[1] # with .jpg at the end!
        image = cv2.imread(path_image,1)

        height, width, channels = image.shape

        if height > max_height:
            max_height = height
        if width > max_width:
            max_width = width
        if channels > gray_channels:
            gray_channels = channels
        if channels > color_channels:
            color_channels = channels
        
    return max_height, max_width

#max_height, max_width = calculate_dataset_max_size()

# Average image size

In [38]:
def average_image_size():
    average_width = 0
    average_height = 0
    for index, image_path in enumerate(glob.glob(pure_dataset_path + "/*.jpg"), start = 1):
        image_name = image_path.split('\\')[1]
        image = cv2.imread(image_path,1)

        image_height, image_width, c = image.shape

        average_width += image_width
        average_height += image_height

        charge_msg("Processed images:" + str(index) + " - Average width: " + str(average_width/index) + " - Average height: " + str(average_height/index))

    print("")
    average_width = average_width / index
    average_height = average_height / index
    print("Average width: " + str(average_width))
    print("Average height: " + str(average_height))
    return average_width, average_height

In [39]:
width_limit,height_limit = average_image_size()

width_limit += 200
height_limit += 200

print("Width limit: " + str(width_limit))
print("Height limit: " + str(height_limit))

Processed images:12750 - Average width: 977.1720784313726 - Average height: 795.5912156862745
Average width: 977.1720784313726
Average height: 795.5912156862745
Width limit: 1177.1720784313725
Height limit: 995.5912156862745


In [41]:
def create_small_dataset():
    number_small_images = 0
    for path_image in glob.glob(pure_dataset_path + "/*.jpg"):
        image_name = path_image.split('\\')[1] # with .jpg at the end!
        image = cv2.imread(path_image,1)

        image_height, image_width, c = image.shape

        if image_height <= height_limit and image_width <= width_limit:
            number_small_images += 1
            cv2.imwrite( small_dataset + '/' + image_name, image)

In [50]:
for path_image in glob.glob(small_dataset_path + "/*.jpg"):
    image_name = path_image.split('\\')[1] # with .jpg at the end!
    normalize_image(path_image, image_name)

In [49]:
"""
print(max_height, max_width)
# Crear matriz X,Y
new_image = np.full([max_height, max_width, 3],255)
print(new_image)
# Copiar imagen en matriz (con canales)
"""
#for c in range(0, 3):
#    [y_offset:y_offset+s_img.shape[0], x_offset:x_offset+s_img.shape[1]] = s_img
"""
old_image = cv2.imread("pure_dataset/000a045a0715d64d.jpg",1)
for c in range(0, 3):
    new_image[0:old_image.shape[0], 0:old_image.shape[1],c] = old_image[0:old_image.shape[0], 0:old_image.shape[1],c]
# Pasar matriz a imagen
img = Image.fromarray(new_image, 'RGB')
img.save("111111aaaaaa.jpg", "JPEG")1
# https://stackoverflow.com/questions/14063070/overlay-a-smaller-image-on-a-larger-image-python-opencv
"""
def normalize_image(image_path, image_name):
    image = cv2.imread(image_path,1)

    image_height, image_width, c = image.shape

    normalized_image = cv2.copyMakeBorder(
        image,
        top = 0,
        bottom = int(height_limit - image_height),
        left = 0,
        right = int(width_limit - image_width),
        borderType = cv2.BORDER_CONSTANT,
        value = [0, 0, 0]
    )

    cv2.imwrite( normalized_dataset_path + '/' + image_name, normalized_image)

# Clear folders

In [51]:
print("Our pure dataset is on " + pure_dataset_path + " (Oh! Is so pure!).")
print("Our dataset will be on " + dataset_path + ". ")

if os.path.isdir(dataset_path):
    print("The dataset folder already exist. We will delete it and the images...")
    shutil.rmtree(dataset_path)
    print("Images and folder deleted")
os.makedirs(dataset_path)
print("Folder: " + dataset_path + " has been created")

os.makedirs(dataset_path + "/train")
print("Folder: " + dataset_path + "/train has been created")
os.makedirs(dataset_path + "/train/color")
print("Folder: " + dataset_path + "/train/color has been created")
os.makedirs(dataset_path + "/train/gray")
print("Folder: " + dataset_path + "/train/color has been created")

os.makedirs(dataset_path + "/validation")
print("Folder: " + dataset_path + "/validation has been created")
os.makedirs(dataset_path + "/validation/color")
print("Folder: " + dataset_path + "/validation/color has been created")
os.makedirs(dataset_path + "/validation/gray")
print("Folder: " + dataset_path + "/validation/color has been created")

os.makedirs(dataset_path + "/test")
print("Folder: " + dataset_path + "/test has been created")
os.makedirs(dataset_path + "/test/color")
print("Folder: " + dataset_path + "/test/color has been created")
os.makedirs(dataset_path + "/test/gray")
print("Folder: " + dataset_path + "/test/color has been created")

Our pure dataset is on pure_dataset (Oh! Is so pure!).
Our dataset will be on dataset. 
The dataset folder already exist. We will delete it and the images...
Images and folder deleted
Folder: dataset has been created
Folder: dataset/train has been created
Folder: dataset/train/color has been created
Folder: dataset/train/color has been created
Folder: dataset/validation has been created
Folder: dataset/validation/color has been created
Folder: dataset/validation/color has been created
Folder: dataset/test has been created
Folder: dataset/test/color has been created
Folder: dataset/test/color has been created


In [52]:
number_images_procesed = 0
MAX_NUMBER_TRAIN = 7000
MAX_NUMBER_VALIDATION = 5000
MAX_NUMBER_TEST = 750
folder = "/train"
for path_image in glob.glob(normalized_dataset_path + "/*.jpg"):
    image_name = path_image.split('\\')[1] # with .jpg at the end!
    image = cv2.imread(path_image,1)
    gray = cv2.cvtColor(cv2.imread(path_image), cv2.COLOR_BGR2GRAY)
    
    if(number_images_procesed > MAX_NUMBER_TRAIN):
        if(number_images_procesed > (MAX_NUMBER_TRAIN + MAX_NUMBER_VALIDATION)):
            folder = "/test"
        else:
            folder = "/validation"
    cv2.imwrite(dataset_path + folder + "/color/" + str(image_name), image)
    cv2.imwrite(dataset_path + folder + "/gray/" + str(image_name), gray)
    
    number_images_procesed = number_images_procesed + 1
    charge_msg("Processing images... Image: " + str(number_images_procesed))
print("")
print("\n")
print("All done")

Processing images... Image: 9715


All done
