# Image Orientation Recognition using CNN

In [1]:
import pandas as pd
import numpy as np
import keras
from keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

## Setting paths
---
This variables will be used throughout our code to improve readability

In [94]:
project_root = os.getcwd()
train_path = os.path.join(project_root, 'data_set', 'train', 'train')
test_path = os.path.join(project_root, 'data_set', 'test')
target_path = os.path.join(project_root, 'target')
checkpoint_path = os.path.join(project_root, 'model_checkpoint')

## Importing Test Labels DataFrame
---

This is a DataFrame that contains the name and the label of each train image as following:
``` 
{
    'fn': filename,
    'label': label
}
```

In [238]:
train_set = pd.read_csv(f'{train_path}/../train.truth.csv')
train_set.head()

Unnamed: 0,fn,label
0,0-10049200_1891-09-16_1958.jpg,rotated_left
1,0-10110600_1985-09-17_2012.jpg,rotated_left
2,0-10126400_1964-07-07_2010.jpg,upright
3,0-1013900_1917-10-15_1960.jpg,rotated_right
4,0-10166400_1960-03-12_2008.jpg,upside_down


## CNN Implementation
---

### Setting frequent parameters

In [239]:
batch_size = 32 #This was arbitrarily defined
target_size = (64, 64) #This is the image resolution's 64x64 pixels
num_classes = train_set.label.nunique()  #Number of unique labels

### Training image set

The cell bellow loads the training set, normalize it by dividing each value by 255 - that is the maximum possible value - and splits it into 'training' and 'validation' - 3:1 ratio.

In [97]:
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)
train_generator=datagen.flow_from_dataframe(
    dataframe=train_set, 
    directory=train_path, 
    x_col="fn",
    y_col="label",
    subset="training",
    batch_size=batch_size,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=target_size)

valid_generator=datagen.flow_from_dataframe(
    dataframe=train_set, 
    directory=train_path, 
    x_col="fn",
    y_col="label",
    subset="validation",
    batch_size=batch_size,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=target_size)

Found 36672 validated image filenames belonging to 4 classes.
Found 12224 validated image filenames belonging to 4 classes.


### Test image set
---
As it doesn't has labels we used the `flow_from_directory` method. pointing only the directory of images.

**Important:** It is important to Normalize this data too.

In [98]:
test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_directory(
    directory=test_path,
    target_size=target_size,
    color_mode="rgb",
    batch_size=1,
    class_mode=None,
    shuffle=False,
    seed=42
)

Found 5361 images belonging to 1 classes.


### CNN Structure

Plans all the CNN layer structure and its 'functionalities'.

Basically implements an sequential Neural Network that will compares pixels in a positional way and apply some calculation on it to try to compute the probability on each pixel set to belong to any of our classes.

For a complete understanding of each function and method used here I recomend watching the [Tensorflow tutorial](https://www.youtube.com/playlist?list=PL9Hr9sNUjfsmEu1ZniY0XpHSzl5uihcXZ) provided by Hvass Laboratories and available on YouTube. It consists on a playlist teaching all the practical aspects of creating this CNN Network and it is applied on CIFAR-10  and MNIST dataset.

In [8]:
model = Sequential()
model.add(Conv2D(batch_size, (3, 3), padding='same',
                 input_shape=train_generator[0][0].shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(batch_size, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(batch_size*2, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(batch_size*2, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

#### If you want to see a draft of the layers execute the cell bellow

In [241]:
if False:
    model.summary()

### Setting proper step size for each dataset
---
This variables contains the size of each executable fraction that will be feeded to our model at the time

In [50]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

### Training CNN

The number of epochs was also arbitrarily defined. As you can see in the outputed cell, each epoch took at least900 seconds to be completed and the entropy lost decay rate - that is our evaluation metric to be minimized - began very low from the 8th epoch.

In [10]:
model.fit_generator(
    generator=train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID,
    epochs=10 
)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ff576747290>

### Saving trained model
---
This was implemented to avoid having to train our model every time we need to use it.
It also can be used as an checkpoint to improve our model and run more epochs.

In [92]:
# Save model and weights into checkpoint directory
model_name = 'CNN-Photo_orientation'
model_path = os.path.join(checkpoint_path, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

INFO:tensorflow:Assets written to: /Users/diogotelheirodonascimento/Desktop/DeeperSystem/test2/model_checkpoint/CNN-Photo_orientation/assets
Saved trained model at /Users/diogotelheirodonascimento/Desktop/DeeperSystem/test2/model_checkpoint/CNN-Photo_orientation 


### Evaluating model using validation set
---
Now we evaluate our accuracy on our validation set. We would like to run on our entire set, so we need to redifine our `batch_size` and `our STEP_SIZE`

In [243]:
# Score trained model.
valid_generator.batch_size = 1
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
model.evaluate(valid_generator, steps=STEP_SIZE_VALID)



[0.06644802540540695, 0.9761943817138672]

An accuracy of 97% in our first attempt to create this kind of model works very fine by me at this point, as it was not provided an minimum accepted accuracy.

### Getting the orientation on each test file
---
Now we want to use our model to predict the class of the test set

In [51]:
test_generator.reset()
pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)



In [244]:
pred.shape

(5361, 4)

Important to notice that the output of or CNN is an probability array of shape (n, y) in which:
- n is the number of images
- y consists on the probability of belonging to each of the 4 classes

Having said so we need to compute the 'real class' for each image.
To do so we just select the highest probability on each row.

In [209]:
predicted_class_indices=np.argmax(pred,axis=1)
predicted_class_indices[:5]

array([0, 2, 1, 1, 1])

In [248]:
'''Here we just are creating an `labels` dictionary to convert 
one-hot-encoded array to our class label'''
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())

#Here we create an list with the real class label
predictions = [labels[k] for k in predicted_class_indices] 

### Generating predictions DataFrame
---
To submit our results we will generate an DataFrame in the same format that was provided to us.

In [249]:
filenames=test_generator.filenames
results=pd.DataFrame({"fn":filenames,
                      "label":predictions})
print(results.shape)
results.head()

(5361, 2)


Unnamed: 0,fn,label
0,test_set/90-10184590_1979-06-16_2006.jpg,rotated_left
1,test_set/90-1019890_1931-08-10_1978.jpg,upright
2,test_set/90-10241990_1984-11-28_2007.jpg,rotated_right
3,test_set/90-102690_1966-09-09_2011.jpg,rotated_right
4,test_set/90-10303590_1983-01-26_2010.jpg,rotated_right


In [112]:
results.to_csv(os.path.join(target_path,"predictions.csv"),index=False)

## Correcting Image Orientation
---
For this second part we will correct the orientation of each image no the test set.

First we will create a copy of the Test set within the target directory

In [266]:
from distutils.dir_util import copy_tree
import shutil

corrected_test_dir = os.path.join(target_path, 'corrected_test')
if not os.path.isdir(corrected_test_dir):
    print('creating directory')
    os.mkdir(corrected_test_dir)
    print('finished')
else:
    print('The directory already exists')

print('Rebooting directory...')
for filename in os.listdir(corrected_test_dir):
    file_path = os.path.join(corrected_test_dir, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

# copy_tree(os.path.join(test_path, 'test_set'), corrected_test_dir)
print('Clean Directory Finished')

The directory already exists
Rebooting directory...
Finished
0 files created!


Then we will iterate over the directory, open each image and rotate it as in our rotation_guide suggests

In [267]:
rotation_guide = {
    'upright': 0, 
    'rotated_right': 90, 
    'upside_down': 180,
    'rotated_left': 270
}

In [268]:
from PIL import Image
from time import sleep

verbose = False
test_image_path = os.path.join(test_path, 'test_set')

print('Begining correct orientation image routine')
for n,image_name in enumerate(os.listdir(test_image_path)):
    if verbose:
        print(f'{n}/{len(os.listdir(test_image_path))}', end='\r')
        sleep(0.1)
    image_path = os.path.join(test_image_path, image_name)
    image_orientation = results.loc[results.fn.str.contains(image_name), 'label'].values[0]
    assert len(image_orientation) > 0, 'No image found!'
    assert len(image_orientation) > 1, f'Multiple images with same name! on name {image_name}'
    image = Image.open(image_path)
    rotated = image.rotate(rotation_guide[image_orientation])
    rotated.save(os.path.join(corrected_test_dir, image_name.replace('.jpg', '.png')), format='png')
print('Finished!')

Begining correct orientation image routine
Finished!


In [269]:
len(os.listdir(corrected_test_dir))

5361

#### Zip Output
Then we will Zip our corrected folder to submit it.

In [270]:
import zipfile

zf = zipfile.ZipFile(f'{corrected_test_dir}.zip', "w")
for dirname, subdirs, files in os.walk(corrected_test_dir):
    zf.write(dirname)
    for filename in files:
        zf.write(os.path.join(dirname, filename))
zf.close()

## Creating Numpy Output
---
For the last task we will save our corrected image in an numpy.array as it is required to work with other ML models.

In [271]:
correct_datagen=ImageDataGenerator(rescale=1./255.)
correct_generator=correct_datagen.flow_from_directory(
    directory=test_path,
    target_size=target_size,
    color_mode="rgb",
    batch_size=len(os.listdir(corrected_test_dir)),
    class_mode=None,
    shuffle=False,
    seed=42
)

Found 5361 images belonging to 1 classes.


In [272]:
numpy_output = correct_generator.next()

In [273]:
np.save(os.path.join(target_path, 'numpy_output'), numpy_output)