# Final Project: Neural Networks & Art

In [13]:
from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import os
import zipfile
from zipfile import ZipFile
import pandas as pd
from PIL import Image
import io
import numpy as np
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


## Part 0: Data:

### Unzip Data Files

In [3]:
# Unzip data file
path = "/Users/jillianbrady/Desktop/Core/Final/painter-by-numbers.zip"
directory_to_extract_to = "/Users/jillianbrady/Desktop/Core/Final/FData"
with zipfile.ZipFile(path, 'r') as zip_ref:
    zip_ref.extractall(directory_to_extract_to)

In [6]:
# Unzip individual datasets within data file
training_files = ["train_1", "train_2", "train_3", "train_4", "train_5", "train_6", "train_7", "train_8", "train_9", "train", "test"]

for zfile in training_files:
    f_path = "/Users/jillianbrady/Desktop/Core/Final/FData/" + zfile + ".zip"
    save_to = "/Users/jillianbrady/Desktop/Core/Final/FData"
    with zipfile.ZipFile(f_path, 'r') as zip_ref:
        zip_ref.extractall(save_to)

### Prepare Data

In [142]:
# Resize images to 200 x 200 x 3
fpath = "/Users/jillianbrady/Desktop/Core/Final/FData/Picasso/"
yes_path = fpath + "YES_original/"
no_path = fpath + "NO_original"

for item in os.listdir(yes_path):
    if item != ".DS_Store":
        with open ((yes_path + '/' + item), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
        imResize = im.resize((200,200), Image.ANTIALIAS)
        if np.array(imResize).shape == (200,200,3):
            imResize.save(fpath + "YES/" + item[:-4] + ' resized.jpg', 'JPEG', quality=90)
        
for item in os.listdir(no_path):
    if item != ".DS_Store":
        with open ((no_path + '/' + item), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
            if item == "121.jpg" or item == "195.jpg" or item == "196.jpg":
                im = im.convert('RGB')
        imResize = im.resize((200,200), Image.ANTIALIAS)
        if np.array(imResize).shape == (200,200,3):
            imResize.save(fpath + "NO/" + item[:-4] + ' resized.jpg', 'JPEG', quality=90)

In [143]:
# Data Augmentation
# Cropped images
for item in os.listdir(yes_path):
    if item != ".DS_Store":
        with open ((yes_path + '/' + item), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
            
        width, height = im.size
        if width > height:
            short = height
        else:
            short = width
        imCrop = im.crop((0, 0, short, short))
        
        imResize = imCrop.resize((200,200), Image.ANTIALIAS)
        if np.array(imResize).shape == (200,200,3):
            imResize.save(fpath + "YES/" + item[:-4] + ' resized1.jpg', 'JPEG', quality=90)

# Cropped & rotated images
for item in os.listdir(yes_path):
    if item != ".DS_Store":
        with open ((yes_path + '/' + item), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
            
        width, height = im.size
        if width > height:
            imCrop = im.crop((width-height, 0, height, width))
        elif height > width:
            imCrop = im.crop((0, height-width, width, height))
        else:
            imCrop = im
        imCrop = imCrop.rotate(90)
        
        imResize = imCrop.resize((200,200), Image.ANTIALIAS)
        if np.array(imResize).shape == (200,200,3):
            imResize.save(fpath + "YES/" + item[:-4] + ' resized2.jpg', 'JPEG', quality=90)

In [144]:
# Add images to X and labels to y
y_path = fpath + "YES"
n_path = fpath + "NO"
X = []
y = []

for f in os.listdir(y_path):
    if f != ".DS_Store":
        with open ((y_path + "/" + f), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
        im = np.array(im)
        X.append(im)
        # 1 = images that are by Picasso
        y.append(1)

for f in os.listdir(n_path):
    if f != ".DS_Store":
        with open ((n_path + "/" + f), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
        im = np.array(im)
        X.append(im)
        # 0 = images not by Picasso
        y.append(0)

In [145]:
# Split X and y into train and test sets
X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.05, random_state=40)

In [146]:
# Format data for CNN
img_rows, img_cols = 200, 200

if K.image_data_format() == 'channels_first':
    X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
    input_shape = (3, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
    input_shape = (img_rows, img_cols, 3)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

## Part 1: ls It a Picasso?

### The Model

In [158]:
model = Sequential()
model.add(Conv2D(filters=32, activation='relu', kernel_size=3, strides=(3, 3), input_shape=(200, 200, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=32, activation='relu',kernel_size=3, strides=(3, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, activation='relu', kernel_size=3, strides=(3, 3)))
model.add(MaxPooling2D(pool_size=(1, 1)))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.fit(X_train, y_train,
          batch_size=16,
          epochs=5,
          verbose=1,
          validation_data=(X_test, y_test))

Train on 2926 samples, validate on 155 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x1a45ee2f60>

### Evaluation

In [159]:
predictions = model.predict(X_test)
pre_round = []
for val in predictions:
    if val < .5:
        pre_round.append(0)
    else:
        pre_round.append(1)

In [164]:
correct = 0
incorrect = 0
false_positive = 0
false_negative = 0
for x in range(len(y_test)):
    if y_test[x] == pre_round[x]:
        correct += 1
    else:
        incorrect += 1
        if y_test[x] == 1:
            false_negative+=1
        else:
            false_positive+=1
            
print("Accuracy:\t\t" + str(correct/len(y_test)))
print("% False Negatives:\t" + str(false_negative/incorrect))

Accuracy:		0.7612903225806451
% False Negatives:	0.43243243243243246


* The model predicts whether a work of art is by Picasso with **76% accuracy**.
* In its inaccurate predictions, there is a **balance between false negatives and false positives**.
>
>This means that the model doesn't always predict "Yes" because there are more works by Picasso in the dataset, and doesn't always predict "No" because there are far fewer works by Picasso in the dataset.

### Further Evaluation


In [165]:
# Getting images from directory
ty_path = fpath + "test_yes"
tn_path = fpath + "test_no"
ty = []
tn = []

for f in os.listdir(ty_path):
    if f != ".DS_Store":
        with open ((ty_path + "/" + f), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
        im = np.array(im)
        ty.append(im)

for f in os.listdir(tn_path):
    if f != ".DS_Store":
        with open ((tn_path + "/" + f), 'rb') as file:
            im = Image.open(io.BytesIO(file.read()))
        im = np.array(im)
        tn.append(im)

In [166]:
# Formatting data for CNN
img_rows, img_cols = 200, 200

ty = np.array(ty)
tn = np.array(tn)
if K.image_data_format() == 'channels_first':
    ty = ty.reshape(ty.shape[0], 3, img_rows, img_cols)
    tn = tn.reshape(tn.shape[0], 3, img_rows, img_cols)
else:
    ty = ty.reshape(ty.shape[0], img_rows, img_cols, 3)
    tn = tn.reshape(tn.shape[0], img_rows, img_cols, 3)

ty = ty.astype('float32')
tn = tn.astype('float32')
ty /= 255
tn /= 255

In [172]:
# Predictions on new test datasets
all_pred = model.predict(ty)
none_pred = model.predict(tn)

all_rounded = []
none_rounded = []
for pred in all_pred:
    if pred < .5:
        all_rounded.append(0)
    else:
        all_rounded.append(1)
for pred in none_pred:
    if pred < .5:
        none_rounded.append(0)
    else:
        none_rounded.append(1)       

In [173]:
print(all_rounded)

[1, 1, 1, 1, 1]


The model identifies 5 works by Picasso that it has never seen before correctly.

In [174]:
print(none_rounded)

[1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]


Of 10 works that are not by Picasso that the model has never seen before, it predicts that 3 are by Picasso and that 12 are not.