In [1]:
import os
import pickle

import keras.utils
import numpy as np
import pandas as pd
from PIL import Image
from keras.layers import Input, Conv2D, Activation, Flatten, Dense
from keras.layers import (concatenate)
from keras.models import Model
from keras.optimizers import Adam
from skimage.transform import resize
from sklearn.metrics import accuracy_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm


data_image = "C:\\Users\\Priit\\Dropbox\\Informaatika\\Magister\\Tehisnärvivõrgud\\data\\train_images"
data_dir = "C:\\Users\\Priit\\Dropbox\\Informaatika\\Magister\\Tehisnärvivõrgud\\data"

label_column = "AdoptionSpeed"


test_size = 0.2
height, width = 100, 100

Using TensorFlow backend.


In [2]:
train = pd.read_csv(os.path.join(data_dir, "train.csv"), sep=',')
pet_ids = train["PetID"]

selected_columns = ["Type",
                    "Gender",
                    "Color1",
                    "Color2",
                    "Color3",
                    "MaturitySize",
                    "FurLength",
                    "Vaccinated",
                    "Dewormed",
                    "Sterilized",
                    "Health",
                    "State",
                    "MaturitySize"]




y = train[label_column]

# One-Hot-encode
X = pd.get_dummies(train[selected_columns], columns=selected_columns)

# Normalize:
to_normalize = ["Age", "Fee", "Quantity"]
for to_norm in to_normalize:
     X[to_norm] = (train[to_norm] - train[to_norm].mean()) / train[to_norm].std()

In [3]:
f_im_name = "images.binary"

if not os.path.isfile(f_im_name):
    
    image_paths = [os.path.join(data_dir, data_image, pet_id + "-1.jpg") for pet_id in pet_ids]

    images = []

    for path in tqdm(image_paths):
        if os.path.isfile(path):
            image = Image.open(path).convert("RGB") 
            image.load()
            image = np.asarray(image, dtype="int32" )
            image = resize(image, (height, width), anti_aliasing=True, mode='constant')
        else:
            image = np.random.rand(height, width, 3) * 255
        
        images.append(image)
    
    images = np.array(images)

    # Standardize:
    mean = np.mean(images)
    std = np.std(images)

    images_meanstd = (images - mean)/std
    with open(f_im_name, 'wb') as handle:
        pickle.dump(images_meanstd, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
    with open(f_im_name, 'rb') as handle:
        images_meanstd = pickle.load(handle)

print(images_meanstd.shape)

100%|████████████████████████████████████████████████████████████████████████████| 14993/14993 [08:55<00:00, 27.98it/s]


(14993, 100, 100, 3)


In [20]:
X_train_img, X_test_img, X_train_else, X_test_else, y_train, y_test = train_test_split(images_meanstd, 
                                                                                       X, 
                                                                                       y, 
                                                                                       test_size=0.95,
                                                                                       random_state=1)

In [23]:
# https://stackoverflow.com/questions/49618986/neural-network-in-keras-with-two-different-input-types-images-and-values
# https://www.learnopencv.com/keras-tutorial-transfer-learning-using-pre-trained-models/
from keras.applications import ResNet50

transfer = ResNet50(weights='imagenet', include_top=False, input_shape=(height, width, 3))

# Freeze ResNet50
for layer in transfer.layers[:-3]:
    layer.trainable = False

# Inputs
image_input = Input(shape=(height, width, 3))
aux_input = Input(shape=(len(list(X_train_else)),))

# Images:
transfer = transfer(image_input)
transfer = Dense(150, activation='relu')(transfer)
flatten = Flatten()(transfer)

# Aux input:
x = Dense(150, activation='relu')(aux_input)
x = Dense(250, activation='relu')(x)
x = Dense(350, activation='relu')(x)

# Merged:
merge = concatenate([flatten, x])
x = Dense(500)(merge)
x = Dense(450, activation='relu')(x)
x = Dense(100, activation='relu')(x)
h = Dense(5)(x)

# Predictions:
predictions = Activation('softmax')(h)

model = Model(inputs=[image_input, aux_input], outputs=predictions)
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


history = model.fit([X_train_img, X_train_else], 
                    keras.utils.to_categorical(y_train),
                    batch_size=8, 
                    epochs=1, 
                    validation_split=0.2)

Train on 599 samples, validate on 150 samples
Epoch 1/1


In [26]:
train_pred = [np.argmax(pred) for pred in model.predict([X_train_img, X_train_else])]
test_predictions = [np.argmax(pred) for pred in model.predict([X_test_img, X_test_else])]

In [27]:
print("Kappa on train: {}".format(round(cohen_kappa_score(y_train, train_pred, weights="quadratic"), 4)))
print("Accuracy on train: {}".format(round(accuracy_score(y_train, train_pred), 4)))
print("________________")
print("Kappa on test: {}".format(round(cohen_kappa_score(y_test, test_predictions, weights="quadratic"), 4)))
print("Accuracy on test: {}".format(round(accuracy_score(y_test, test_predictions), 4)))

Kappa on train: 0.0433
Accuracy on train: 0.3084
________________
