*Author: Jerome Wynne (Jerome.Wynne.2014@bristol.ac.uk)*

*Last updated: 22/06/2017*

# Panel Resizer
---
This script resizes the PV panel images to a fixed size. It drops three images in the process.

In [1]:
# Python 3.5 should be used
import sys
print(sys.version)

3.5.2 |Anaconda 4.1.1 (64-bit)| (default, Jul  5 2016, 11:41:13) [MSC v.1900 64 bit (AMD64)]


In [2]:
# Read in images
from sklearn.model_selection import train_test_split
from skimage.transform import AffineTransform, warp
from scipy.misc import imread, imresize, imsave
from skimage.util import random_noise
from scipy.misc import imread, imsave
from skimage.morphology import disk
from skimage.filters import rank
import pandas as pd
import numpy as np
import shutil
import pickle
import glob
import os

np.random.seed(42)

force = True # Controls whether to overwrite the output of previous runs

raw_image_fps    = glob.glob('.\\data\\raw-images\*.png')
raw_images       = [imread(fp) for fp in raw_image_fps]
image_ids        = range(len(raw_images))

# Sort images by size then drop the first three images, which are much smaller
image_ids  = sorted(image_ids,  key = lambda j: raw_images[j].shape)[3:] # Sort the ids in the same way as the images
raw_images = sorted(raw_images, key = np.shape)[3:]

# Resize images and save them to a new folder
if not os.path.exists('.\\data\\resized-images'):
    os.makedirs('.\\data\\resized-images')
elif force:
    shutil.rmtree('.\\data\\resized-images')
    os.makedirs('.\\data\\resized-images')

# Resize images to median image size
dst_dim        = np.median([img.shape[0] for img in raw_images]).astype(int)
resized_images = [imresize(img, [dst_dim, dst_dim]) for img in raw_images]

# Write images to local directory
for j in range(len(resized_images)):
    imsave('.\\data\\resized-images\\resized_' + str(image_ids[j]) + '.png', resized_images[j])
    
print('Images resized sucessfully.')

Images resized sucessfully.


# Panel Subsetter
---
The script below writes out 6x6 grids of subimages for each resized image.

In [3]:
if not os.path.exists('.\\data\\subset-images'):
    os.makedirs('.\\data\\subset-images')
elif force:
    shutil.rmtree('.\\data\\subset-images')
    os.makedirs('.\\data\\subset-images')
    
patch_dim = np.floor(dst_dim/6).astype(int) # Divide the images into 6x6 patches

for k in range(0, len(resized_images)):
    for h in range(0, 6): # Horizontal grid element variable
        for v in range(0, 6): # Vertical grid element variable
            sub_image = resized_images[k][v*patch_dim:(v + 1)*patch_dim,
                                          h*patch_dim:(h + 1)*patch_dim]
            imsave('.\\data\\subset-images\\subset_' + str(image_ids[k]) + '_' + str(v) + str(h) + '.png', sub_image)
            
print('Images subset successfully.')

Images subset successfully.


# Panel Train-Test-Splitter
---
This script performs a 50:50 stratified split of the subset data into training and testing images. 105 crack images, 1695 no-crack images.

In [4]:
# Read in image labels
image_info = pd.read_csv('.\\data\\labels\\subset-cracked.csv')
train_img_fp, test_img_fp, train_labels, test_labels = train_test_split(image_info['filename'], image_info['cracked'],
                                                                         train_size = 0.5, stratify = image_info['cracked'],
                                                                          random_state = 1)
train_img_fp, valid_img_fp, train_labels, valid_labels = train_test_split(train_img_fp, train_labels,
                                                                          random_state = 1, stratify = train_labels)

# Make directories for training and testing image sets
if not os.path.exists('.\\data\\subset-images\\training'):
    os.makedirs('.\\data\\subset-images\\training')
if not os.path.exists('.\\data\\subset-images\\testing'):
    os.makedirs('.\\data\\subset-images\\testing')
if not os.path.exists('.\\data\\subset-images\\testing'):
    os.makedirs('.\\data\\subset-images\\validation')
elif force:
    shutil.rmtree('.\\data\\subset-images\\training')
    shutil.rmtree('.\\data\\subset-images\\testing')
    os.makedirs('.\\data\\subset-images\\testing')
    os.makedirs('.\\data\\subset-images\\training')
    os.makedirs('.\\data\\subset-images\\validation')
    
# Move the training and testing images to their respective folders
for fp in train_img_fp:  os.rename(".\\data\\subset-images\\" + fp, '.\\data\\subset-images\\training\\' + fp)
for fp in test_img_fp:   os.rename(".\\data\\subset-images\\" + fp, '.\\data\\subset-images\\testing\\' + fp)
for fp in valid_img_fp:  os.rename(".\\data\\subset-images\\" + fp, '.\\data\\subset-images\\validation\\' + fp)

print('Images split into training, testing, and validation sets successfully.')

Images split into training, testing, and validation sets successfully.


# Training Set Augmenter

The following script augments the training set to create a balanced set of responses. It writes the generated images and their associated labels to the hard disk.

In [5]:
def augment_image(img):
    flip_v     = np.random.randint(0, 2)
    flip_h     = np.random.randint(0, 2)
    noise      = np.random.randint(0, 2)
    scale_f    = np.random.uniform(0.98, 1.02) # Some of the cracks are at the extremes of the frame
    tform      = AffineTransform(scale = (scale_f, scale_f))
    aug_img    = warp(img, tform)
    if flip_v: aug_img = np.flip(aug_img, axis = 0)
    if flip_h: aug_img = np.flip(aug_img, axis = 1)
    if noise:  aug_img = random_noise(aug_img, 'gaussian', var = 0.001)
    aug_img    = rank.equalize(img, selem=disk(100))
    aug_img    = aug_img - 0.5 # skimage bumps the images from [0, 255] to [0, 1]
    return aug_img

def pickle_dataset(list_of_img_fps, list_of_labels, name, augment = False):
    if augment:
        img_array = np.array([augment_image(imread(fp)) for fp in list_of_img_fps])
    else:
        img_array = np.array([imread(fp) for fp in list_of_img_fps])
    pickle.dump(img_array, open('.\\data\\' + name + '_dataset.p', 'wb'))
    pickle.dump(list_of_labels, open('.\\data\\' + name + '_labels.p', 'wb'))

def shuffle_data(dataset, labels):
    if type(dataset) != list: dataset = list(dataset)
    if type(labels)  != list: labels  = list(labels)
    shuffle_ix = np.random.choice(range(len(labels)), len(labels), replace = False)
    dataset    = [dataset[ix] for ix in shuffle_ix]
    labels     = [labels[ix] for ix in shuffle_ix]
    return dataset, labels
    
# Balance training data
n_images_per_class = sum(train_labels == 0)
uncracked_img_fp   = train_img_fp[train_labels == 0].sample(n_images_per_class, replace = False)
cracked_img_fp     = train_img_fp[train_labels == 1].sample(n_images_per_class, replace = True)
aug_train_fp       = list(pd.concat([uncracked_img_fp, cracked_img_fp]))
aug_train_labels   = [0]*n_images_per_class + [1]*n_images_per_class

# Shuffle the training data
aug_train_fp       = ['.\\data\\subset-images\\training\\' + fp for fp in aug_train_fp]
valid_img_fp       = ['.\\data\\subset-images\\validation\\' + fp for fp in valid_img_fp]
aug_train_fp, aug_train_labels = shuffle_data(aug_train_fp, aug_train_labels)
valid_img_fp, valid_labels     = shuffle_data(valid_img_fp, valid_labels)

# Pickle the dataset and labels to the disk as an array and list respectively
pickle_dataset(aug_train_fp, aug_train_labels, 'training', augment = True)
pickle_dataset(valid_img_fp, valid_labels, 'validation', augment = False)

print('Validation and training images pickled successfully.')

Validation and training images pickled successfully.
