In [9]:
import os
import pickle

import keras.utils
import numpy as np
import pandas as pd
from PIL import Image
from keras.layers import Input, Conv2D, Activation, Flatten, Dense
from keras.layers import (concatenate)
from keras.models import Model
from keras.optimizers import Adam
from skimage.transform import resize
from sklearn.metrics import accuracy_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm


data_image = "C:\\Users\\Priit\\Dropbox\\Informaatika\\Magister\\Tehisnärvivõrgud\\data\\train_images"
data_dir = "C:\\Users\\Priit\\Dropbox\\Informaatika\\Magister\\Tehisnärvivõrgud\\data"

label_column = "AdoptionSpeed"

LIMIT = -1#100

test_size = 0.2
height, width = 100, 100

In [11]:
train = pd.read_csv(os.path.join(data_dir, "train.csv"), sep=',', nrows=LIMIT if LIMIT != -1 else None)
pet_ids = train["PetID"]

selected_columns = ["Type",
                    "Gender",
                    "Color1",
                    "Color2",
                    "Color3",
                    "MaturitySize",
                    "FurLength",
                    "Vaccinated",
                    "Dewormed",
                    "Sterilized",
                    "Health",
                    "State",
                    "MaturitySize"]




y = train[label_column]

# One-Hot-encode
X = pd.get_dummies(train[selected_columns], columns=selected_columns)

# Normalize:
to_normalize = ["Age", "Fee", "Quantity"]
for to_norm in to_normalize:
     X[to_norm] = (train[to_norm] - train[to_norm].mean()) / train[to_norm].std()

In [5]:

f_im_name = "images.binary"

if not os.path.isfile(f_im_name):
    
    image_paths = [os.path.join(data_dir, data_image, pet_id + "-1.jpg") for pet_id in pet_ids][:LIMIT]

    images = []

    for path in tqdm(image_paths):
        if os.path.isfile(path):
            image = Image.open(path).convert("RGB") 
            image.load()
            image = np.asarray(image, dtype="int32" )
            image = resize(image, (height, width), anti_aliasing=True, mode='constant')
        else:
            image = np.random.rand(height, width, 3) * 255
        
        images.append(image)
    
    images = np.array(images)

    # Standardize:
    mean = np.mean(images)
    std = np.std(images)

    images_meanstd = (images - mean)/std
    with open(f_im_name, 'wb') as handle:
        pickle.dump(images_meanstd, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
    with open(f_im_name, 'rb') as handle:
        images_meanstd = pickle.load(handle)

print(images_meanstd.shape)

(14992, 100, 100, 3)


In [16]:
#X_train, X_test, y_train, y_test = train_test_split(images_meanstd, X[:LIMIT], y[:LIMIT], test_size=test_size, random_state=1)
X_train_img, X_test_img, X_train_else, X_test_else, y_train, y_test = train_test_split(images_meanstd, 
                                                                                       X[:LIMIT], 
                                                                                       y[:LIMIT], 
                                                                                       test_size=test_size,
                                                                                       random_state=1)

In [None]:
# https://stackoverflow.com/questions/49618986/neural-network-in-keras-with-two-different-input-types-images-and-values

aux_input = Input(shape=(len(list(X_train_else)),))
image_input = Input(shape=(height, width, 3))


h = Conv2D(32, (7, 7), strides=(2, 2))(image_input)
h = Conv2D(64, (14, 14))(h)
h = Activation('relu')(h)
flatten = Flatten()(h)

merge = concatenate([flatten, aux_input])

# Inital network:
x = Dense(150, activation='relu')(merge)
x = Dense(250, activation='relu')(x)
x = Dense(350, activation='relu')(x)
x = Dense(450, activation='relu')(x)
x = Dense(700, activation='relu')(x)
x = Dense(500)(x)

# Added merged
d1 = Dense(100, activation='elu')(merge)
d2 = Dense(50, activation='elu')(d1)
d3 = Dense(10, activation='elu')(d2)
h = Dense(5)(d3)
predictions = Activation('softmax')(h)

model = Model(inputs=[image_input, aux_input], outputs=predictions)
model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])


history = model.fit([X_train_img, X_train_else], 
                    keras.utils.to_categorical(y_train),
                    batch_size=8, 
                    epochs=1, 
                    validation_split=0.2)

Train on 9594 samples, validate on 2399 samples
Epoch 1/1
  56/9594 [..............................] - ETA: 51:02 - loss: 1.6108 - acc: 0.50 - ETA: 32:13 - loss: 5.3404 - acc: 0.37 - ETA: 25:44 - loss: 4.4097 - acc: 0.29 - ETA: 22:38 - loss: 3.7346 - acc: 0.25 - ETA: 20:37 - loss: 3.3539 - acc: 0.22 - ETA: 19:15 - loss: 3.9835 - acc: 0.22 - ETA: 18:24 - loss: 3.6440 - acc: 0.2321

In [None]:
test_predictions = model.predict([X_test_img, X_test_else])
test_predictions = [np.argmax(pred) for pred in test_predictions]
train_pred = [np.argmax(pred) for pred in model.predict([X_test_img, X_test_else])]
print(test_predictions)

print("Kappa on train: {}".format(round(cohen_kappa_score(y_train, train_pred, weights="quadratic"), 4)))
print("Accuracy on train: {}".format(round(accuracy_score(y_train, train_pred), 4)))
print("________________")
print("Kappa on test: {}".format(round(cohen_kappa_score(y_test, test_predictions, weights="quadratic"), 4)))
print("Accuracy on test: {}".format(round(accuracy_score(y_test, test_predictions), 4)))