In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score as accu

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.linear_model import Perceptron
from sklearn.ensemble import RandomForestClassifier as forest
from sklearn.ensemble import GradientBoostingClassifier as gbc

from sklearn.model_selection import ParameterGrid

import cv2
import os
from tqdm import tqdm, tqdm_notebook
from keras.applications.densenet import preprocess_input, DenseNet121

In [None]:
train_df = pd.read_csv('../input/train.csv')
img_size = 256
batch_size = 16

pet_ids = train_df['PetID'].values
n_batches = len(pet_ids) // batch_size + 1

In [None]:
def resize_to_square(im):
    old_size = im.shape[:2] # old_size is in (height, width) format
    ratio = float(img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    # new_size should be in (width, height) format
    im = cv2.resize(im, (new_size[1], new_size[0]))
    delta_w = img_size - new_size[1]
    delta_h = img_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0, 0, 0]
    new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
    return new_im

def load_image(path, pet_id):
    image = cv2.imread(f'{path}{pet_id}-1.jpg')
    new_image = resize_to_square(image)
    new_image = preprocess_input(new_image)
    return new_image

In [None]:
from keras.models import Model
from keras.layers import GlobalAveragePooling2D, Input, Lambda, AveragePooling1D
import keras.backend as K
inp = Input((256,256,3))
backbone = DenseNet121(input_tensor = inp, include_top = False)
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Lambda(lambda x: K.expand_dims(x,axis = -1))(x)
x = AveragePooling1D(4)(x)
out = Lambda(lambda x: x[:,:,0])(x)

m = Model(inp,out)

In [None]:
features = {}
for b in tqdm_notebook(range(n_batches)):
    start = b*batch_size
    end = (b+1)*batch_size
    batch_pets = pet_ids[start:end]
    batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
    for i,pet_id in enumerate(batch_pets):
        try:
            batch_images[i] = load_image("../input/train_images/", pet_id)
        except:
            pass
    batch_preds = m.predict(batch_images)
    for i,pet_id in enumerate(batch_pets):
        features[pet_id] = batch_preds[i]

In [None]:
train_feats = pd.DataFrame.from_dict(features, orient='index')
train_feats.columns = ['pic_'+str(i) for i in range(train_feats.shape[1])]

In [None]:
test_df = pd.read_csv('../input/test.csv')

pet_ids = test_df['PetID'].values
n_batches = len(pet_ids) // batch_size + 1

features = {}
for b in tqdm_notebook(range(n_batches)):
    start = b*batch_size
    end = (b+1)*batch_size
    batch_pets = pet_ids[start:end]
    batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
    for i,pet_id in enumerate(batch_pets):
        try:
            batch_images[i] = load_image("../input/test_images/", pet_id)
        except:
            pass
    batch_preds = m.predict(batch_images)
    for i,pet_id in enumerate(batch_pets):
        features[pet_id] = batch_preds[i]

In [None]:
test_feats = pd.DataFrame.from_dict(features, orient='index')
test_feats.columns = ['pic_'+str(i) for i in range(test_feats.shape[1])]

In [None]:
test_feats = test_feats.reset_index()
test_feats.rename({'index': 'PetID'}, axis='columns', inplace=True)

train_feats = train_feats.reset_index()
train_feats.rename({'index': 'PetID'}, axis='columns', inplace=True)

test_feats.head()

In [None]:
df_train = pd.read_csv("../input/train.csv")
y_train = df_train["AdoptionSpeed"].values

In [None]:
#test = pd.merge(test, test_feats, how='left', on='PetID')

df_train.columns

In [None]:
def select_cols(df):
    df = df[['Type', 'Age', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2',
       'Color3', 'MaturitySize', 'FurLength', 'Vaccinated', 'Dewormed',
       'Sterilized', 'Health', 'Quantity', 'Fee',
       'VideoAmt', 'PhotoAmt']]
    return df
#df_train = pd.merge(df_train, train_feats, how='left', on='PetID')
def train_scaler(df):
    scaler = StandardScaler()
    scaler.fit(df)
    return scaler

def clean_dataset(df, scaler):
    df = scaler.transform(df)
    return pd.DataFrame(df)

In [None]:
df_train.dtypes

In [None]:
petIds = df_train["PetID"]
df_train = select_cols(df_train)
scaler = train_scaler(df_train)
df_train = clean_dataset(df_train, scaler)
df_train["PetID"] = petIds
df_train = pd.merge(df_train, train_feats, how='left', on='PetID')
df_train.drop(["PetID"], axis='columns', inplace=True)
X_train, X_test, y_train, y_test = train_test_split(df_train.values, y_train, test_size=0.33, random_state=42)

In [None]:
print(len(X_train))
print(len(y_train))

In [None]:
toto = pd.DataFrame(X_train)
toto["y"] = y_train
toto.describe()

In [None]:
df_train.describe()

In [None]:
#clf = forest(n_estimators=150, verbose=1)
#clf = knn(n_neighbors=30)
#clf = Perceptron(penalty="l1", max_iter=100)


parameters = {'n_estimators':[150, 200, 250]}
grid = ParameterGrid(parameters)

for parameters in grid:
    clf = gbc(loss='deviance', learning_rate=0.11, **parameters)
    print("Fit with parameters : {}".format(parameters))
    clf.fit(X_train, y_train)
    
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    
    print(accu(y_train, y_pred_train))
    print(accu(y_test, y_pred_test))

In [None]:
clf = gbc(loss='deviance', learning_rate=0.11, n_estimators=200)
clf.fit(X_train, y_train)

In [None]:
df_submission = pd.read_csv("../input/test.csv")

df_submission = pd.merge(df_submission, test_feats, how='left', on='PetID')

petIds = df_submission["PetID"].values
df_submission = select_cols(df_submission)
df_submission = clean_dataset(df_submission, scaler)
y_pred_test = clf.predict(df_submission.values)

df_y_pred_test = pd.DataFrame(y_pred_test, columns=["AdoptionSpeed"])
df_y_pred_test["PetID"] = petIds
cols = df_y_pred_test.columns.tolist()
cols = cols[-1:] + cols[:-1]
df_y_pred_test = df_y_pred_test[cols]

In [None]:
df_y_pred_test.describe()

In [None]:
df_y_pred_test.to_csv("submission.csv", index=None)