# Libraries & Helper Functions

In [127]:
import pandas as pd
import numpy as np
from PIL import Image
from scipy import ndimage, misc
import matplotlib.pyplot as plt
%matplotlib inline
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten
from keras.optimizers import Adam
from keras import backend as K
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils

In [65]:
def convert_jpg_array(poster_path, img_height, img_width):
    '''
    inputs: director of poster
    outputs: array of poster with given heights and width
    '''
    try:
        img = Image.open(poster_path)
        img = misc.imresize(img, (img_height, img_width))
        arr = np.array(img)
    except:
        arr = None
    return arr

In [101]:
def create_keras_array(df, img_height, img_weight):
    poster_array = np.zeros((df.shape[0], img_height, img_weight, 3))
    for i, value in df['poster'].iteritems():
        try:
            if len(value.shape) < 3:
                value = np.resize(value, (value.shape[0], value.shape[1], 3))
            poster_array[i, :, :, :] = value
        except:
            print(i)
    return poster_array

997
998
999


In [None]:
def encode_outcomes(genres):
    encoder = LabelEncoder()
    encoded_Y = encoder.fit_transform(genres)
    y = np_utils.to_categorical(encoded_Y)
    return y

# Load Data

In [62]:
train_df = pd.read_csv('train_w_poster.csv', encoding = "ISO-8859-1")
test_df = pd.read_csv('test_w_poster.csv', encoding = "ISO-8859-1")

train_df = train_df[train_df['poster_path'].notnull()]
test_df = test_df[test_df['poster_path'].notnull()]

#change poster path accordingly
POSTER_PREFIX_PATH = 'poster_images/'

train_df['poster_path'] = train_df['poster_path'].apply(lambda path: POSTER_PREFIX_PATH + path)
test_df['poster_path'] = test_df['poster_path'].apply(lambda path: POSTER_PREFIX_PATH + path)

# Generate Train and Test Data

In [None]:
IMG_HEIGHT = 225
IMG_WIDTH = 150

poster_train = train_df['poster_path'].apply(lambda x: convert_jpg_array(x, IMG_HEIGHT, IMG_WIDTH))
poster_test = test_df['poster_path'].apply(lambda x: convert_jpg_array(x, IMG_HEIGHT, IMG_WIDTH))

In [95]:
# train - recombine genres and posters, filtering out any null poster values
poster_train_df = train_df[['genre']]
poster_train_df['poster'] = poster_train
poster_train_df = poster_train_df[poster_train_df['poster'].notnull()]

# test - recombine genres and posters, filtering out any null poster values
poster_test_df = test_df[['genre']]
poster_test_df['poster'] = poster_test
poster_test_df = poster_test_df[poster_test_df['poster'].notnull()]

In [None]:
X_train = create_keras_array(poster_train_df, IMG_HEIGHT, IMG_WIDTH)
X_test = create_keras_array(poster_test_df, IMG_HEIGHT, IMG_WIDTH)

In [121]:
y_train = encode_outcomes(poster_train_df['genre'])
y_test = encode_outcomes(poster_test_df['genre'])

# Initialize Deep Learning Model

In [123]:
# create an empty network model
model = Sequential()

# --- input layer ---
model.add(Conv2D(16, kernel_size=(5, 5), activation='relu', input_shape=X_train.shape[1:]))
# --- max pool ---
model.add(MaxPooling2D(pool_size=(2, 2)))

# flatten for fully connected classification layer
model.add(Flatten())
# note that the 10 is the number of classes we have
# the classes are mutually exclusive so softmax is a good choice
# --- fully connected layer ---
model.add(Dense(16, activation='relu'))
# --- classification ---
model.add(Dense(y_train.shape[1], activation='softmax'))

# prints out a summary of the model architecture
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 221, 146, 16)      1216      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 110, 73, 16)       0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 128480)            0         
_________________________________________________________________
dense_5 (Dense)              (None, 16)                2055696   
_________________________________________________________________
dense_6 (Dense)              (None, 11)                187       
Total params: 2,057,099.0
Trainable params: 2,057,099.0
Non-trainable params: 0.0
_________________________________________________________________


In [125]:
LEARNING_RATE = 0.1

adam = Adam(lr=LEARNING_RATE)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])

In [None]:
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                             factor = 0.2,
                             patience = 5,
                             min_lr = 0.001)

# Run Deep Learning Model

In [126]:
BATCH_SIZE = 100
EPOCHS = 200

history = model.fit(X_train, y_train,
                    validation_split = 0.2,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    verbose=1,
                   callbacks = [reduce_lr])

# once training is complete, let's see how well we have done
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Epoch 1/3
Epoch 2/3
Epoch 3/3
