In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import skimage

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from keras.applications.vgg16 import preprocess_input

Using TensorFlow backend.


In [2]:
RANDOM_STATE = 7532
PHOTO_DIR = 'data/images/'

In [3]:
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

sample_submission = pd.read_csv('data/sample_submission.csv')

First, let's have a look at our data frames

In [4]:
train_data.shape, test_data.shape, sample_submission.shape

((6252, 2), (2680, 1), (2680, 2))

In [5]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6252 entries, 0 to 6251
Data columns (total 2 columns):
image       6252 non-null object
category    6252 non-null int64
dtypes: int64(1), object(1)
memory usage: 97.8+ KB


In [6]:
train_data.head()

Unnamed: 0,image,category
0,2823080.jpg,1
1,2870024.jpg,1
2,2662125.jpg,2
3,2900420.jpg,3
4,2804883.jpg,2


Below, we can see that the data distribution is not balanced across the 5 classes. This issue will be addressed at the later stage via assigning appropriate class weights.

In [7]:
train_data['category'].value_counts()

1    2120
5    1217
2    1167
3     916
4     832
Name: category, dtype: int64

In [8]:
y = train_data['category']

Now, let's split the train data into the train set (75%) and the validation set (25%).

In [9]:
train_set, valid_set, _, _ = train_test_split(train_data, y, test_size=0.25, random_state=RANDOM_STATE, 
                                              shuffle=True, stratify=y)

test_set = test_data

In [10]:
train_set['category'].value_counts()

1    1590
5     913
2     875
3     687
4     624
Name: category, dtype: int64

In [11]:
valid_set['category'].value_counts()

1    530
5    304
2    292
3    229
4    208
Name: category, dtype: int64

Adjust the pictures so that they have the same size and the same dimensions.

In [12]:
def convert_to_3D(img):
    """Converts a 2D image to a 3D image."""
    
    img_3D = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.float32)       
    img_3D[:,:,0], img_3D[:,:,1], img_3D[:,:,2] = img, img, img
    
    return img_3D 


NEW_WIDTH = 256
NEW_HEIGHT = 384

def homogenize_images(img_path_series, img_dir=PHOTO_DIR, new_width=NEW_WIDTH, new_height=NEW_HEIGHT):
    """Resizes images to new_width and new_height, converts 2D images to 3D images, 
       creates and returns an array of homogenized 3D images, and retuns 2D image count."""
    
    #index = 0
    count_2D = 0
    
    homogenized_image_array = []
    
    for img_name in img_path_series:        
        img = plt.imread(img_dir + img_name)
        
        if (img.shape[0] == new_width) and (img.shape[1] == new_height):
            img = (img / 255.0).astype(np.float32)
        else:
            # The resize function returns a float64 representation of an image
            img = skimage.transform.resize(img, (new_width, new_height), 
                                           order=3, mode='reflect', 
                                           anti_aliasing=True).astype(np.float32)
        
        #new_img_name = img_name[:-4] + '_HMGD' + img_name[-4:]
        
        # Turn 2D images into 3D images
        if img.ndim == 2:            
            img = convert_to_3D(img)           
            count_2D += 1
        
        #plt.imsave(img_dir + new_img_name, img_3D)
        #img_path_series[index] = new_img_name
        
        #index += 1
        
        homogenized_image_array.append(img)
    
    homogenized_image_array = np.array(homogenized_image_array)
        
    return homogenized_image_array , count_2D

In [13]:
%%time
train_set_hmgd, count_2D = homogenize_images(train_set['image'])
print('Train set:')
print(f'Number of 2D photos converted to 3D: {count_2D}')
np.save('train_set_hmgd_arr_256_384.npy', train_set_hmgd)

# free up memory
del train_set_hmgd
print()


valid_set_hmgd, count_2D = homogenize_images(valid_set['image'])
print('Validation set:')
print(f'Number of 2D photos converted to 3D: {count_2D}')
np.save('valid_set_hmgd_arr_256_384.npy', valid_set_hmgd)

# free up memory
del valid_set_hmgd
print()


test_set_hmgd, count_2D = homogenize_images(test_set['image'])
print('Test set:')
print(f'Number of 2D photos converted to 3D: {count_2D}')
np.save('test_set_hmgd_arr_256_384.npy', test_set_hmgd)

# free up memory
del test_set_hmgd
print()

Train set:
Number of 2D photos converted to 3D: 77

Validation set:
Number of 2D photos converted to 3D: 27

Test set:
Number of 2D photos converted to 3D: 50

Wall time: 13min 58s


Apply input preprocessing suitable for VGG16.

In [14]:
X_train = np.load('train_set_hmgd_arr_256_384.npy')
tmp = preprocess_input(X_train * 255)
tmp = tmp[:, :, :, ::-1]
np.save('train_set_hmgd_arr_256_384_VGG16.npy', tmp)

# free up memory
del X_train
del tmp


X_valid = np.load('valid_set_hmgd_arr_256_384.npy')
tmp = preprocess_input(X_valid * 255)
tmp = tmp[:, :, :, ::-1]
np.save('valid_set_hmgd_arr_256_384_VGG16.npy', tmp)

# free up memory
del X_valid
del tmp


X_test = np.load('test_set_hmgd_arr_256_384.npy')
tmp = preprocess_input(X_test * 255)
tmp = tmp[:, :, :, ::-1]
np.save('test_set_hmgd_arr_256_384_VGG16.npy', tmp)

# free up memory
del X_test
del tmp

Save the metadata.

In [15]:
train_set.to_csv('train_set_metadata.csv', index=False)
valid_set.to_csv('valid_set_metadata.csv', index=False)
test_set.to_csv('test_set_metadata.csv', index=False)