In [None]:
import pandas as pd
import numpy as np
import os.path
from keras.applications import ResNet50
from keras.optimizers import Adam
import cv2
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K

In [None]:
CSV_TRAIN_FILE = 'train.csv'
TRAIN_IMAGES_PATH = '../../data/train_/{}'
CSV_TEST_FILE = 'test.csv'
TEST_IMAGES_PATH = '../../data/test_/{}'

In [None]:
df_train = pd.read_csv(CSV_TRAIN_FILE)
NUM_CLASSES = 15
IMAGE_X_SIZE = 350
IMAGE_Y_SIZE = 350

### Loading images

In [None]:
def load_images(data, images_path, x_size, y_size, train):
    resize=(x_size, y_size)
    x = []
    y = []
    ids = []

    for img_id, age, gender, view, file, detected in tqdm(data.values, miniters=100):
        img = cv2.imread(images_path.format(file))
        x.append(cv2.resize(img, resize))
        ids.append(img_id)
        
        if train:
            targets = np.zeros(NUM_CLASSES)
            index = int(detected.replace("class_", ""))
            targets[index] = 1
            y.append(targets)
    
    y = np.array(y, np.uint8)

    return x, y, ids

In [None]:
X_all, y_all, ids = load_images(df_train, TRAIN_IMAGES_PATH, IMAGE_X_SIZE, IMAGE_Y_SIZE, train=True)

### Split dataset to training and test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.3, random_state=0)

In [None]:
X_train = np.array(X_train, np.float32) / 255
X_test = np.array(X_test, np.float32) / 255

### Building a model

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMAGE_X_SIZE, IMAGE_Y_SIZE, 3))
for layer in base_model.layers[1:]: #freeze all layers
    layer.trainable = False
model = Sequential([
    base_model,
    Flatten(),
    Dense(2048, activation='relu'),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax')
])
for layer in model.layers[0].layers[-35:]: #unfreeze some layers
    layer.trainable = True
optimizer = Adam(0.0001, decay=0.00000001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

#### Loading model parameters

In [None]:
model.load_weights("ResNet50.hdf5")

### Training a model

In [None]:
history = model.fit(X_train, y_train,
          batch_size=64,
          epochs=50,
          verbose=1,
          validation_data=(X_test, y_test))

### Predictions

In [None]:
df_test = pd.read_csv(CSV_TEST_FILE)
df_test['detected'] = 'dummy'

In [None]:
test_X_all, y_test, ids = load_images(df_test, TEST_IMAGES_PATH, IMAGE_X_SIZE, IMAGE_Y_SIZE, train=False)
test_X_all = np.array(test_X_all, np.float32) / 255

In [None]:
prediction = model.predict(test_X_all, batch_size=64, verbose=1)

### Create submission file

In [None]:
labels = ["class_" + str(class_number) for class_number in np.argmax(prediction, -1)]
dataFrame = {'row_id': ids, 'detected': labels}
submission = pd.DataFrame(data=dataFrame)
submission[ ['row_id', 'detected'] ].to_csv('answers.csv', index=False)