# Sort Images with Artificial Intelligence
In this project we are going to train an AI Model which will be able to sort and understand our images. In the end we will use the AI to recognize what is shown on the picture and move the image to a proper folder.
By using this project by yourself, you can save a lot of time by using the AI instead of going through the pictures by yourself.
We are going to use so called Transfer Learning for this project. We take a pretrained CNN (Convolutional Neural Network), the VGG 16 and we are going to cut off the last layers where the decision takes place. The CNN is able to recognize all the Features on the image, only the Decision Making Park (Feedforward Neural Network) has to be retrained.



## First Step: Read the Training-Images
First we store similar pictures in the same folder because our model has to be trained.

In [40]:
import os
import numpy as np
from tqdm import tqdm_notebook as tqdm
from PIL import Image

def read_images(path):
    files = os.listdir(path)
    
    files = [file for file in files if file[-4:] == ".jpg" or file[-4:] == ".JPG" or file[-4:] == ".PNG" or file[-4:] == ".png"]
    images = []
    for file in files:
        try:
            image = Image.open(os.path.join(path, file))

            # https://pillow.readthedocs.io/en/3.1.x/reference/Image.html#PIL.Image.Image.resize
            image = image.resize((224, 224), Image.LANCZOS)

            # https://pillow.readthedocs.io/en/3.1.x/reference/Image.html#PIL.Image.Image.convert
            image = image.convert("RGB")

            image = np.asarray(image)

            images.append(image)
        except OSError:
            pass
    
    return images
    
selfie = read_images(os.path.join("Me"))
mountains = read_images(os.path.join("Mountains"))

selfie = np.asarray(selfie)
mountains = np.asarray(mountains)


X = np.concatenate([selfie, mountains])

# Generate Labels for the data - we associate zero with dogs, one with selfies and two with mountains
y_selfie = np.ones(len(selfie))
y_mountains = np.zeros(len(mountains))

y = np.concatenate([y_selfie, y_mountains])



## Transfer Learning with VGG 16

In [41]:
from keras.applications.vgg16 import VGG16, preprocess_input

# Preprocess data with existing VGG 16
X = preprocess_input(X)

In [43]:
vgg16_model = VGG16(include_top = False, input_shape = (224,224,3))
vgg16_model.trainable = False

X_after_vgg = vgg16_model.predict(X, verbose = 0)

In [44]:
X_after_vgg.shape

(15, 7, 7, 512)

In [45]:
from sklearn.utils import shuffle

X_after_vgg, y = shuffle(X_after_vgg, y)

In [46]:
from keras.models import Sequential
from keras.layers import Dense, Flatten
from tensorflow import keras

# Define Optimizer
opt = keras.optimizers.Adam(learning_rate=0.1)


model = Sequential()

model.add(Flatten(input_shape=(7, 7, 512)))
model.add(Dense(1024, activation="relu"))
model.add(Dense(16, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(optimizer=opt, loss="binary_crossentropy", metrics=["acc"])

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_3 (Flatten)         (None, 25088)             0         
                                                                 
 dense_9 (Dense)             (None, 1024)              25691136  
                                                                 
 dense_10 (Dense)            (None, 16)                16400     
                                                                 
 dense_11 (Dense)            (None, 1)                 17        
                                                                 
Total params: 25,707,553
Trainable params: 25,707,553
Non-trainable params: 0
_________________________________________________________________


In [47]:
model.fit(X_after_vgg, y, epochs=10, batch_size=64, validation_split=0.2, shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23c614e2b20>

In [48]:
# Write the trained model to disk
model.save("Mountain_Selfie.h5")

## Model Testing

In [49]:
# Open the test data
testing = read_images(os.path.join("Testing"))

In [50]:
# Pre Process Input
testing = np.asarray(testing)
testing = preprocess_input(testing)


In [51]:
testing.shape

(8, 224, 224, 3)

In [52]:
X_after_vgg_testing = vgg16_model.predict(testing)

In [54]:
model.predict(X_after_vgg_testing)

array([[1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.0000000e+00],
       [1.6332612e-28],
       [6.6269170e-38],
       [0.0000000e+00],
       [1.0000000e+00]], dtype=float32)

## Move Files

In [55]:
# Import Library os - Operating System interface
from os import listdir
from os.path import isfile, join


# Path to your files

path_file = "Testing"

# Lambda Function - Get all the files from a specific folder
filenames = [f for f in listdir(path_file) if isfile(join(path_file, f)) if f[-4:] == ".jpg" or f[-4:] == ".JPG" or f[-4:] == ".PNG" or f[-4:] == ".png"]

In [56]:
def read_single(path):
    try:
        image = Image.open(os.path.join(path))

        # https://pillow.readthedocs.io/en/3.1.x/reference/Image.html#PIL.Image.Image.resize
        image = image.resize((224, 224), Image.LANCZOS)

        # https://pillow.readthedocs.io/en/3.1.x/reference/Image.html#PIL.Image.Image.convert
        image = image.convert("RGB")

        image = np.asarray(image)

    except OSError:
        pass
    
    return image

In [57]:
def predict(image):
    image = np.asarray(image)
    image = image.reshape(-1,224, 224, 3)
    image = preprocess_input(image)
    X_after_vgg = vgg16_model.predict(image)
    return model.predict(X_after_vgg)

In [58]:
for file in filenames:
    testing = read_single(os.path.join("Testing", file))
    if predict(testing) == 0:
        os.rename( join("Testing", file), join("Mountains", file))
    elif predict(testing) == 1:
        os.rename( join("Testing", file), join("Me", file))
