In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm, tqdm_notebook
import matplotlib.pyplot as plt
import tensorflow as tf

import xgboost as xgb
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
import json
print(os.listdir("../input"))
input_meta_dir = '../input/petfinder-adoption-prediction/'
train_images_data_dir = '../input/petfinder-adoption-prediction/train_images'
test_images_data_dir = '../input/petfinder-adoption-prediction/test_images'
#input_meta_dir = '../input/'

In [None]:
meta_train = pd.read_csv(input_meta_dir + 'train/train.csv')
meta_test = pd.read_csv(input_meta_dir + 'test/test.csv')

In [None]:
print(meta_train.shape)
print(meta_test.shape)

In [None]:
def load_sentiment_dataframe(split_type='train'):
    sentiment_split = '{split_type}_sentiment/'.format(split_type=split_type)
    train_sentiment = []
    for filename in os.listdir(input_meta_dir + sentiment_split):
        with open(input_meta_dir + sentiment_split + filename, 'r') as json_file:    
            data = json.load(json_file)
            info_to_keep = data['documentSentiment'] # e.g. {'magnitude': 2.1, 'score': 0.4}
            info_to_keep['Language'] = data['language']
            info_to_keep['PetID'] = filename.split('.')[0]
            train_sentiment.append(info_to_keep)

    train_sentiment_df = pd.DataFrame(train_sentiment)
    return train_sentiment_df

In [None]:
train_sentiment = load_sentiment_dataframe()
test_sentiment = load_sentiment_dataframe(split_type='test')

In [None]:
meta_train = meta_train.merge(train_sentiment, on='PetID', how='left')
meta_test = meta_test.merge(test_sentiment, on='PetID', how='left')

In [None]:
data = meta_train.append(meta_test, sort=False)

In [None]:
data.shape

In [None]:
def get_labels_map(label_type):
    labels = pd.read_csv(input_meta_dir + label_type.lower() + '_labels.csv')
    labels_map = dict(zip(labels['{}ID'.format(label_type)], labels['{}Name'.format(label_type)]))
    return labels_map

state_map = get_labels_map('State')
breed_map = get_labels_map('Breed')
color_map = get_labels_map('Color')

data.loc[:, 'Type'] = data.Type.map({1:'Dog', 2:'Cat'})
data.loc[:, 'Breed1'] = data.Breed1.map(breed_map)
data.loc[:, 'Breed2'] = data.Breed2.map(breed_map)
data.loc[:, 'Color1'] = data.Color1.map(color_map)
data.loc[:, 'Color2'] = data.Color2.map(color_map)
data.loc[:, 'Color3'] = data.Color3.map(color_map)
data.loc[:, 'State'] = data.State.map(state_map)

In [None]:
def get_impute_dict(data, var_name, top_n):
    impute_dict = {x:x for x in data[var_name].value_counts()[:top_n].index.values}
    impute_dict.update({'Missing':'Missing'})
    return impute_dict

In [None]:
def clean_data(data):
    data.Name.fillna('no name', inplace=True)
    data.loc[:, 'NoName'] = data.Name.str.lower().str.contains('no name').astype(int)
    data.loc[:, 'NameWordLength'] = data.Name.apply(lambda x: len(x.split(' ')))
    data.Description.fillna('None', inplace=True)
    data.loc[:, 'NameInDescription'] = data.apply(lambda record: record.Name.lower() in record.Description.lower(), axis=1).astype(int)
    data.loc[:, 'DescriptionWordLength'] = data.Description.apply(lambda x: len(x.split(' ')))
    #bins = np.array([0, 10, 25, 50, 100, 150, 200])
    #data.loc[:, 'DescriptionWordLength'] = np.digitize(data.DescriptionWordLength.values, bins)
    #data.loc[:, 'Breed1'] = data.Breed1.fillna('Missing').map(get_impute_dict(data, 'Breed1', 10)).fillna('Other')
    #data.loc[:, 'Breed2'] = data.Breed2.fillna('Missing').map(get_impute_dict(data, 'Breed2', 10)).fillna('Other')
    data.loc[:, 'Breed1'] = data.Breed1.map(get_impute_dict(data, 'Breed1', 10)).fillna('Other')
    data.loc[:, 'Breed2'] = data.Breed2.map(get_impute_dict(data, 'Breed2', 10)).fillna('Other')
    data.loc[:, 'RescuerCount'] = data.groupby(['RescuerID'])['Type'].transform('count') 
    return data

In [None]:
data = clean_data (data)

In [None]:
data.tail(2)

In [None]:
# Use this for target-encoding - may be done better
# Modified to include test set as well

def calc_smooth_mean(df, by, on, m):
    # df - input pandas dataframe
    # Compute the global mean
    mean = df[on].mean()

    # Compute the number of values and the mean of each group
    agg = df.groupby(by)[on].agg(['count', 'mean'])
    counts = agg['count']
    means = agg['mean']

    # Compute the "smoothed" means
    smooth = (counts * means + m * mean) / (counts + m)

    # Replace each value by the according smoothed mean
    #return smooth
    return df[by].map(smooth)

In [None]:
# TE = target encoding
te_weight = 10
data['Breed1'] = calc_smooth_mean(data, by='Breed1', on='AdoptionSpeed', m=te_weight)
data['Breed2'] = calc_smooth_mean(data, by='Breed2', on='AdoptionSpeed', m=te_weight)
data['Color1'] = calc_smooth_mean(data, by='Breed1', on='AdoptionSpeed', m=te_weight)
data['Color2'] = calc_smooth_mean(data, by='Breed2', on='AdoptionSpeed', m=te_weight)
data['Color3'] = calc_smooth_mean(data, by='Breed1', on='AdoptionSpeed', m=te_weight)
data['State'] = calc_smooth_mean(data, by='State', on='AdoptionSpeed', m=te_weight)

In [None]:
data.head(2)

In [None]:
do_not_use = ['Name', 'Description', 'RescuerID'] #, 'PetID'] - do not forget remove it later!
categorical_cols = ['Type', 'Language']
data = pd.get_dummies(data, columns =categorical_cols)

In [None]:
data = data.drop (do_not_use, axis =1)
data.shape

In [None]:
X_train =data.loc[np.isfinite(data.AdoptionSpeed), :]
X_test = data.loc[~np.isfinite(data.AdoptionSpeed), :]

In [None]:
print (X_train.shape)
print (X_test.shape)
X_train.columns

In [None]:
print (X_train.shape[0] == meta_train.shape[0])


In [None]:
X_test = X_test.drop(['AdoptionSpeed'], axis=1)

In [None]:
print (X_test.shape[0]  == meta_test.shape[0] )

In [None]:
###  IMAGE FEATURES from VGG16 ###

In [None]:
# Modified from the kernel https://www.kaggle.com/mkozine/weighted-kappa-loss-for-keras-tensorflow
# Eliminated bsize = (batch size), use y_pred.shape[0] instead

def kappa_loss(y_pred, y_true, y_pow=2, eps=1e-10, bsize=256, N=5, name='kappa'):
    """A continuous differentiable approximation of discrete kappa loss.
        Args:
            y_pred: 2D tensor or array, [batch_size, num_classes]
            y_true: 2D tensor or array,[batch_size, num_classes]
            y_pow: int,  e.g. y_pow=2
            N: typically num_classes of the model
                        eps: a float, prevents divide by zero
            name: Optional scope/name for op_scope.
        Returns:
            A tensor with the kappa loss."""

    with tf.name_scope(name):
        y_true = tf.to_float(y_true)
        repeat_op = tf.to_float(tf.tile(tf.reshape(tf.range(0, N), [N, 1]), [1, N]))
        repeat_op_sq = tf.square((repeat_op - tf.transpose(repeat_op)))
        weights = repeat_op_sq / tf.to_float((N - 1) ** 2)
    
        pred_ = y_pred ** y_pow
        try:
            pred_norm = pred_ / (eps + tf.reshape(tf.reduce_sum(pred_, 1), [-1, 1]))
        except Exception:
            pred_norm = pred_ / (eps + tf.reshape(tf.reduce_sum(pred_, 1), [bsize, 1]))
    
        hist_rater_a = tf.reduce_sum(pred_norm, 0)
        hist_rater_b = tf.reduce_sum(y_true, 0)
    
        conf_mat = tf.matmul(tf.transpose(pred_norm), y_true)
    
        nom = tf.reduce_sum(weights * conf_mat)
        denom = tf.reduce_sum(weights * tf.matmul(
            tf.reshape(hist_rater_a, [N, 1]), tf.reshape(hist_rater_b, [1, N])) /
                              tf.to_float(bsize))
    
        return nom / (denom + eps)

In [None]:
name_target_dict = meta_train.set_index('PetID')['AdoptionSpeed'].to_dict()

train_image_names = os.listdir(train_images_data_dir)
n_train_images = len(train_image_names)
test_image_names = os.listdir(test_images_data_dir)
n_test_images = len(test_image_names)
print (train_image_names [0:2])
print ("No. of train images: " + str (n_train_images))
print ("No. of test images: " + str (n_test_images))

In [None]:
generator_dict = {'filename': [], 'PetID':[], 'class': []}

for name in train_image_names:
    short_name = name.split('-')[0]
    label = name_target_dict[short_name]
    
    generator_dict['filename'].append(name)
    generator_dict['PetID'].append(short_name)
    generator_dict['class'].append(label)

generator_df_full = pd.DataFrame(generator_dict)
print (generator_df_full.shape)
generator_df_full[:3]

In [None]:
test_name_target_dict = meta_test.set_index('PetID').to_dict()
test_generator_dict = {'filename': [], 'PetID':[]}

In [None]:
for name in test_image_names:
    short_name = name.split('-')[0]
    #label = test_name_target_dict[short_name]
    
    test_generator_dict['filename'].append(name)
    test_generator_dict['PetID'].append(short_name)
    

test_generator_df = pd.DataFrame(test_generator_dict)
test_generator_df.shape

In [None]:
len(meta_train)

In [None]:
np.random.seed(seed=6)
mask = np.random.randn(len(meta_train)) < 0.9
train_split = X_train[mask]
validation_split = X_train[~mask]
print (train_split.shape)
print (validation_split.shape)

In [None]:
# Full dicts will be helpful to attach the image features to the right PetID
train_generator_full_df = generator_df_full.loc[generator_df_full['PetID'].isin(train_split['PetID'].values)]

In [None]:
train_generator_df = train_generator_full_df.copy()
train_generator_df = train_generator_df[['filename','class']]
train_generator_df['class'] = train_generator_df['class'].astype(str)
train_generator_df.shape

In [None]:
valid_generator_full_df = generator_df_full.loc[generator_df_full['PetID'].isin(validation_split['PetID'].values)]
valid_generator_df = valid_generator_full_df.copy()
valid_generator_df = valid_generator_df[['filename','class']]
valid_generator_df['class'] = valid_generator_df['class'].astype(str)
valid_generator_df.shape

In [None]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense, BatchNormalization
from keras import applications

In [None]:
# dimensions of our images.
img_width, img_height = 150, 150
epochs = 50
batch_size = 32

In [None]:
datagen = ImageDataGenerator( rescale=1/255.)

In [None]:
# Create data generator for the VGG16 part of the model
def create_generator_vgg16(data_dir, input_df):
    return datagen.flow_from_dataframe(
        input_df, 
        data_dir, 
        x_col='filename',
        y_col='class', 
        has_ext=True,  # If image extension is given in x_col
        target_size=(img_width, img_height), 
        color_mode='rgb',
        class_mode=None, 
        batch_size=batch_size, 
        shuffle=False, # we will just apply the fixed VGG16 weights to the images 
        seed=6
    )

In [None]:
# Make length of train and valid splits divisible by batch_size
# for the future VGG-16 propagation needs
n_train_split = train_generator_df.shape[0]//batch_size*batch_size
print (" Length of train portion decreased from " + str(train_generator_df.shape[0]) + " to " 
       + str (n_train_split))
n_valid_split = valid_generator_df.shape[0]//batch_size*batch_size
print (" Length of valid portion decreased from " + str(valid_generator_df.shape[0]) + " to " 
       + str (n_valid_split))

In [None]:
train_generator_df_short = train_generator_df.head(n_train_split)
train_generator_full_df_short = train_generator_full_df.head(n_train_split)
train_generator_df_short.shape

In [None]:
valid_generator_full_df_short = valid_generator_full_df.head(n_valid_split)
valid_generator_df_short = valid_generator_df.head(n_valid_split)
valid_generator_df_short.shape

In [None]:
train_generator = create_generator_vgg16(train_images_data_dir, train_generator_df_short)
valid_generator = create_generator_vgg16(train_images_data_dir, valid_generator_df_short)

In [None]:
from keras.applications.vgg16 import VGG16
from keras.models import Model
base_model = VGG16(include_top = False,
                  input_shape=(img_width, img_height,3),
                  weights='../input/vgg16-weights/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')

In [None]:
bottleneck_features_train = base_model.predict_generator(train_generator, n_train_split//batch_size, verbose = 1)
bottleneck_features_train.shape

In [None]:
bottleneck_features_valid = base_model.predict_generator(valid_generator, n_valid_split // batch_size, verbose = 1)
bottleneck_features_valid.shape

In [None]:
from keras.utils import to_categorical
train_labels = to_categorical(train_generator_df_short['class'])
valid_labels = to_categorical(valid_generator_df_short['class'])
valid_labels.shape

In [None]:
# build dense layesr model on to of VGG16
top_model = Sequential()
top_model.add(Flatten(input_shape=bottleneck_features_train.shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.6))
top_model.add(BatchNormalization (epsilon=0.001))
#top_model.add(Dense(16, activation='relu'))
#top_model.add(Dropout(0.4))
#top_model.add(BatchNormalization (epsilon=0.001))
top_model.add(Dense(5, activation='softmax'))

In [None]:
#top_model.compile(optimizer='rmsprop',
#              loss='binary_crossentropy', metrics=['accuracy'])
mko_optimizer = keras.optimizers.rmsprop(lr=0.00005)
top_model.compile(loss = kappa_loss,
              optimizer = mko_optimizer,
              metrics=['accuracy'])

top_model.summary()

In [None]:
top_model.fit(bottleneck_features_train, train_labels,
          epochs=30,
          batch_size=32,
          validation_data=(bottleneck_features_valid, valid_labels))


In [None]:
# Model is ready.
# Use it to create image features for train, valid and test portions
# First - train
train_images_predictions = top_model.predict(bottleneck_features_train, verbose=1)

In [None]:
train_generator_full_df_short.shape

In [None]:
tr_df = pd.DataFrame(train_images_predictions, columns = ("Img_0", "Img_1","Img_2","Img_3","Img_4"))
tr_df.shape

In [None]:
# Pack it nicely and merge
train_images_df = train_generator_full_df_short.join(pd.DataFrame(train_images_predictions, columns = ("Img_0", "Img_1","Img_2","Img_3","Img_4")))
train_images_df.drop(columns=['filename', 'class'], inplace=True)
train_images_df.loc[:,'Img_pred'] = train_images_df.iloc[:,1:6].values.argmax(axis=1)
print(train_images_df.shape)
train_images_df.head()

In [None]:
train_images_df = train_images_df.groupby('PetID', as_index=False).median()
train_images_df.shape

In [None]:
train_split.columns

In [None]:
#train_images_df = train_images_df.groupby('PetID', as_index=False).mean()
train_split = pd.merge(train_split, train_images_df, how='left', on = 'PetID')
train_split.shape

In [None]:
train_split.head(5)

In [None]:
'''train_split['Img_0'] = train_split.Img_0.fillna(0)
train_split['Img_1'] = train_split.Img_1.fillna(0)
train_split['Img_2'] = train_split.Img_2.fillna(0)
train_split['Img_3'] = train_split.Img_3.fillna(0)
train_split['Img_4'] = train_split.Img_4.fillna(0)'''

In [None]:
train_split['Img_pred'][train_split['Img_pred']==0.000000] = float('nan')
#train_split['Img_pred'] = train_split.Img_pred.fillna(3)
train_split[['AdoptionSpeed','Img_pred']]

In [None]:
# Valid
valid_images_predictions = top_model.predict(bottleneck_features_valid, verbose=1)

In [None]:
valid_images_df = valid_generator_full_df_short.join(pd.DataFrame(valid_images_predictions, columns = ("Img_0", "Img_1","Img_2","Img_3","Img_4")))
valid_images_df.drop(columns=['filename', 'class'], inplace=True)
valid_images_df['Img_pred'] = valid_images_df.iloc[:,1:6].values.argmax(axis=1)

In [None]:
valid_images_df = valid_images_df.groupby('PetID', as_index=False).mean()


In [None]:
valid_images_df.shape

In [None]:
validation_split = pd.merge(validation_split, valid_images_df, how='left', on='PetID')
print (validation_split.shape)
validation_split.head(5)

In [None]:
validation_split['Img_pred'][validation_split['Img_pred']==0.000000] = float ('nan')
#validation_split['Img_pred'] = validation_split.Img_pred.fillna(3)
#validation_split['Img_0'] = validation_split.Img_0.fillna(0)
#validation_split['Img_1'] = validation_split.Img_1.fillna(0)
#validation_split['Img_2'] = validation_split.Img_2.fillna(0)
#validation_split['Img_3'] = validation_split.Img_3.fillna(0)
#validation_split['Img_4'] = validation_split.Img_4.fillna(0)
#validation_split[['AdoptionSpeed','Img_pred']]

In [None]:
X_train1 = train_split.append(validation_split)

In [None]:
X_train1.shape

In [None]:
X_train1[:5]

In [None]:
# Test
test_generator = ImageDataGenerator(rescale=1/255.).flow_from_dataframe(
    test_generator_df,
    test_images_data_dir,
    has_ext=True,
    target_size=(img_width, img_height),
    color_mode='rgb',
    batch_size=64,
    shuffle=False,
    class_mode=None
)

In [None]:
bottleneck_features_test = base_model.predict_generator(test_generator, len(test_generator), verbose=1)

In [None]:
test_images_predictions = top_model.predict(bottleneck_features_test, verbose=1 )

In [None]:
test_images_df = test_generator_df.join(pd.DataFrame(test_images_predictions, columns = ("Img_0", "Img_1","Img_2","Img_3","Img_4")))
test_images_df.drop(columns=['filename'], inplace=True)
print(test_images_df.shape)
test_images_df.head()

In [None]:
#data.loc[:, 'NameWordLength'] =
test_images_df.loc[:,'Img_pred'] = test_images_df.iloc[:,1:6].values.argmax(axis=1)

In [None]:
test_images_df.shape

In [None]:
test_images_df = test_images_df.groupby('PetID', as_index=False).mean()


In [None]:
print (X_test.shape)

In [None]:
print (X_test.shape)
X_test = pd.merge(X_test, test_images_df, how='left', on = 'PetID')
print (X_test.shape)
X_test.head()

In [None]:
X_test['Img_pred'][X_test['Img_pred']==0.000000] = float('nan')
#X_test['Img_pred'] = X_test.Img_pred.fillna(0)
#X_test['Img_0'] = X_test.Img_0.fillna(0)
#X_test['Img_1'] = X_test.Img_1.fillna(0)
#X_test['Img_2'] = X_test.Img_2.fillna(0)
#X_test['Img_3'] = X_test.Img_3.fillna(0)
#X_test['Img_4'] = X_test.Img_4.fillna(0)
X_test.head(5)

In [None]:
X_train1 = X_train1.drop('PetID', axis = 1)

In [None]:
X_test = X_test.drop('PetID', axis = 1)

In [None]:
#######################################################################################################

In [None]:
#### End IMAGE FEATURES from VGG16 ###

In [None]:
###### BEGIN FEATURES FOR RESNET ######

In [5]:
def resnet_script():
    import numpy as np # linear algebra
    import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
    import matplotlib.pyplot as plt
    import os
    from os.path import isfile, join, abspath, exists, isdir, expanduser

    from functools import reduce
    from shutil import copyfile
    import torchvision.transforms as transforms

    from skimage import io, transform, color
    from timeit import default_timer as timer
    from PIL import Image

    import torch
    from torch import nn # import neural network
    from torch import optim # import optimization
    import torch.nn.functional as F
    from torchvision import datasets, transforms, models
    from collections import OrderedDict
    from torch.utils import data

    input_dir = '../input/petfinder-adoption-prediction/'

    train = pd.read_csv(input_dir + 'train/train.csv')
    test = pd.read_csv(input_dir + 'test/test.csv')

    def get_photo_file_names(PetID, PhotoAmt):
        return ['{}-{}'.format(PetID, str(num+1)) for num in range(int(PhotoAmt))]

    train['PhotoFileNames'] = train.apply(lambda row: get_photo_file_names(row['PetID'], row['PhotoAmt']), axis=1)
    test['PhotoFileNames'] = test.apply(lambda row: get_photo_file_names(row['PetID'], row['PhotoAmt']), axis = 1)

    ## create training and validation split 
    mask = np.random.randn(len(train))<0.8
    train_split = train[mask]
    validation_split = train[~mask]

    partition = {'train': reduce(lambda x,y: x+y, train_split.PhotoFileNames),
             'validation': reduce(lambda x,y: x+y, validation_split.PhotoFileNames),
             'test' : reduce(lambda x,y: x+y, test.PhotoFileNames)}

    # create labels dictionary for all the images
    def get_photo_label_dict(PhotoFileNames, Type):
        return dict(zip(PhotoFileNames, np.repeat(Type, len(PhotoFileNames))))

    labels = {}
    photo_label_dict_list = train.apply(lambda row: get_photo_label_dict(row['PhotoFileNames'], row['AdoptionSpeed']), axis=1).values

    for photo_label_dict in photo_label_dict_list:
        labels.update(photo_label_dict)

    class AdoptionDataset(data.Dataset):
        def __init__(self, list_IDs, labels=None, transform=None, train_split=True):
            self.train_split = train_split
            self.transform = transform
            self.labels = labels
            self.list_IDs = list_IDs

        def __len__(self):
            return len(self.list_IDs)

        def __getitem__(self, index):
            # Select sample
            ID = self.list_IDs[index]
            split_type = ['train' if self.train_split else 'test'][0]
            image_path = '{input_dir}{split_type}_images/{ID}.jpg'.format(input_dir=input_dir, split_type=split_type, ID=ID)

            # Load data and get label
            image = io.imread(image_path)
            image = color.gray2rgb(image)
        #         label = self.labels[ID]

            if self.train_split:
                label = self.labels[ID]
                if self.transform:
                    image = self.transform(image)
                    label = torch.tensor(label)
                return image, label
            else:
                if self.transform:
                    image = self.transform(image)
                return image

    params = {'batch_size': 64,
          'shuffle': True,
          }

    train_transforms = transforms.Compose([transforms.ToPILImage(),
                                       transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])

    vali_test_transforms = transforms.Compose([transforms.ToPILImage(),
                                      transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])])


    input_size = 224

    training_set = AdoptionDataset(partition['train'], labels, transform=train_transforms)
    training_generator = data.DataLoader(training_set, **params)

    validation_set = AdoptionDataset(partition['validation'], labels, transform=vali_test_transforms)
    validation_generator = data.DataLoader(validation_set, **params)

    test_set = AdoptionDataset(partition['test'], transform=vali_test_transforms, train_split=False)
    test_generator = data.DataLoader(test_set, batch_size = 1, shuffle=False)

    class_to_idx = dict({'0': 0, '1': 1, '2': 2, '3': 3, '4': 4})

    trainiter = iter(training_generator)
    features, labels = next(trainiter)

    valiter = iter(validation_generator)
    features, labels = next(valiter)

    testiter = iter(test_generator)
    features = next(testiter)

    def imshow(image, ax=None, title=None, normalize=True):
        """Imshow for Tensor."""
        if ax is None:
            fig, ax = plt.subplots()
        image = image.numpy().transpose((1, 2, 0))

        # because in the transform above, we normalized the image
        # so we need to convert back to original
        if normalize:
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            image = std * image + mean
            image = np.clip(image, 0, 1)

        ax.imshow(image)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.tick_params(axis='both', length=0)
        ax.set_xticklabels('')
        ax.set_yticklabels('')

        return ax

    model = models.resnet152()
    model.load_state_dict(torch.load("../input/resnet152/resnet152.pth"))    

    drop_p = 0.3
    learning_rate = 0.001
    n_inputs = model.fc.in_features
    num_classes = 5 


    # Add on classifier
    model.fc = nn.Sequential(nn.Linear(n_inputs, 1000),
                         nn.ReLU(),
                         nn.Dropout(drop_p),
                         nn.BatchNorm1d(1000),
                         nn.Linear(1000, num_classes),
                         nn.LogSoftmax(dim=1))
    # define criterion and optimizer
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = learning_rate)    
    # Freeze model weights
    for param in model.parameters():
        param.requires_grad = False

    # Unfreeze some layers

    for i in range(1,7):
        list(model.parameters())[-i].requires_grad = True

    def validation(model, criterion, dataset):

        model.to(device)

        accuracy = 0
        test_loss = 0

        for inputs, labels in iter(dataset):
            inputs, labels = inputs.to(device), labels.to(device)

            output = model.forward(inputs)
            test_loss += criterion(output, labels).item()

            ## Calculating the accuracy
            # Model's output is log-softmax, take exponential to get the probabilities
            ps = torch.exp(output)

            # Class with highest probability is our predicted class, compare with true label
            equality = (labels.data == ps.max(1)[1])

            # Accuracy is number of correct predictions divided by all predictions, just take the mean
            accuracy += equality.type_as(torch.FloatTensor()).mean()

        return test_loss, accuracy

    # train the network
    epochs = 1
    print_every = 40
    steps = 0
    model.train()
    # change to cuda if avaliable
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    for e in range(epochs):
        running_loss = 0

    for inputs, labels in iter(training_generator):
        steps +=1

        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        # Forward and backward passes
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if steps % print_every == 0:

            # Model in inference mode, dropout is off
            model.eval()

            # Turn off gradients for validation, will speed up inference
            with torch.no_grad():
                test_loss, accuracy = validation(model, criterion, dataset = validation_generator, )

            print("Epoch: {}/{}.. ".format(e+1, epochs),
                  "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                  "Validation Loss: {:.3f}.. ".format(test_loss/len(validation_generator)),
                  "Validation Accuracy: {:.3f}".format(accuracy/len(validation_generator)))

            running_loss = 0

            # Make sure dropout and grads are on for training
            model.train()    

    def predict(img,model,k = 1):
        img = img.to(torch.device(device))
        # convert to tensor with the right format 
        img = img.unsqueeze(0) 

        # move img2 to cuda 
        img2 = img

        #put the image to the model for prediction
        with torch.no_grad():
            output = model.forward(img2)

        # get probabilities and classes
        probs, classes = output.topk(k)

        # model is in logsoftmax, use exp() to convert back to probabilities
        # since the format is Tensor, convert back to numpy array
        ps = torch.exp(probs)
        probs = ps[0]
        classes = classes[0]

        # print(probs)
        # print(classes)

        return classes[0], torch.exp(output)    

    # put the model in eval mode
    model.eval()

    # change to cuda if avaliable
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    classes, output = predict(features[0],model, k=5)
    #print('Probability is: ', probs)
    print('classes is: ', classes)    

    test_predictions = {}
    test_output = {}
    for filename, images in zip(partition['test'], test_generator):
        classes, output = predict(images[0], model, k = 5)   
        test_predictions[filename] = classes
        test_output[filename] = output.cpu()

    test_submissions_output = pd.DataFrame((k, *v[0]) for k, v in test_output.items())    
    
    for i in range(1,6):
        test_submissions_output[i] = test_submissions_output[i].astype(float)

    test_submissions_output.columns = ['PetID', 'ResNet-0', 'ResNet-1', 'ResNet-2', 'ResNet-3', 'ResNet-4']
    test_submissions_output['PetID'] = test_submissions_output['PetID'].apply(lambda x: x[:-2])    
    test_submissions_output = test_submissions_output.groupby('PetID').mean().reset_index()
    test_submissions = pd.DataFrame(dict([ (k,pd.Series(v.tolist())) for k,v in test_predictions.items() ])).melt()

    test_submissions.rename(columns={'variable':'FileName', 'value': 'AdoptionSpeed_ResNet'}, inplace=True)
    test_submissions['PetID'] = test_submissions.FileName.apply(lambda x: x[:-2])

    test_submissions = test_submissions[['PetID','AdoptionSpeed_ResNet']].groupby('PetID').min().reset_index()
    #test_submissions = test_submissions[['PetID','AdoptionSpeed']].groupby('PetID').mean().reset_index()    
    testPetID = test.PetID.to_frame()
    final = testPetID.merge(test_submissions,how = 'left',on = 'PetID').fillna(3)
    final['AdoptionSpeed_ResNet']=final['AdoptionSpeed_ResNet'].astype(int)    

    # Merge this and use as XGBoost features
    final = pd.merge(final, test_submissions_output, on='PetID', how='left')

    return final

resnet_features = resnet_script()

KeyboardInterrupt: 

In [None]:
###### END RESNET ######

In [None]:
#X_test = X_test.drop('PetID', axis = 1)
#X_train = X_train.drop('PetID', axis = 1)

In [None]:
#X_train_non_null = X_train1.fillna(-1)
X_train_non_null = X_train1.fillna(-1)
X_test_non_null = X_test.fillna(-1)

In [None]:
X_train_non_null.isnull().any().any(), X_test_non_null.isnull().any().any()

In [None]:
X_train_non_null.shape, X_test_non_null.shape

In [None]:
len(X_train.columns)

In [None]:
import scipy as sp

from collections import Counter
from functools import partial
from math import sqrt

from sklearn.metrics import cohen_kappa_score, mean_squared_error
from sklearn.metrics import confusion_matrix as sk_cmatrix

In [None]:
# Quadratic Kappa calculation

# FROM: https://www.kaggle.com/myltykritik/simple-lgbm-image-features

# The following 3 functions have been taken from Ben Hamner's github repository
# https://github.com/benhamner/Metrics
def confusion_matrix(rater_a, rater_b, min_rating=None, max_rating=None):
    """
    Returns the confusion matrix between rater's ratings
    """
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(rater_a + rater_b)
    if max_rating is None:
        max_rating = max(rater_a + rater_b)
    num_ratings = int(max_rating - min_rating + 1)
    conf_mat = [[0 for i in range(num_ratings)]
                for j in range(num_ratings)]
    for a, b in zip(rater_a, rater_b):
        conf_mat[a - min_rating][b - min_rating] += 1
    return conf_mat


def histogram(ratings, min_rating=None, max_rating=None):
    """
    Returns the counts of each type of rating that a rater made
    """
    if min_rating is None:
        min_rating = min(ratings)
    if max_rating is None:
        max_rating = max(ratings)
    num_ratings = int(max_rating - min_rating + 1)
    hist_ratings = [0 for x in range(num_ratings)]
    for r in ratings:
        hist_ratings[r - min_rating] += 1
    return hist_ratings


def quadratic_weighted_kappa(y, y_pred):
    """
    Calculates the quadratic weighted kappa
    axquadratic_weighted_kappa calculates the quadratic weighted kappa
    value, which is a measure of inter-rater agreement between two raters
    that provide discrete numeric ratings.  Potential values range from -1
    (representing complete disagreement) to 1 (representing complete
    agreement).  A kappa value of 0 is expected if all agreement is due to
    chance.
    quadratic_weighted_kappa(rater_a, rater_b), where rater_a and rater_b
    each correspond to a list of integer ratings.  These lists must have the
    same length.
    The ratings should be integers, and it is assumed that they contain
    the complete range of possible ratings.
    quadratic_weighted_kappa(X, min_rating, max_rating), where min_rating
    is the minimum possible rating, and max_rating is the maximum possible
    rating
    """
    rater_a = y
    rater_b = y_pred
    min_rating=None
    max_rating=None
    rater_a = np.array(rater_a, dtype=int)
    rater_b = np.array(rater_b, dtype=int)
    assert(len(rater_a) == len(rater_b))
    if min_rating is None:
        min_rating = min(min(rater_a), min(rater_b))
    if max_rating is None:
        max_rating = max(max(rater_a), max(rater_b))
    conf_mat = confusion_matrix(rater_a, rater_b,
                                min_rating, max_rating)
    num_ratings = len(conf_mat)
    num_scored_items = float(len(rater_a))

    hist_rater_a = histogram(rater_a, min_rating, max_rating)
    hist_rater_b = histogram(rater_b, min_rating, max_rating)

    numerator = 0.0
    denominator = 0.0

    for i in range(num_ratings):
        for j in range(num_ratings):
            expected_count = (hist_rater_a[i] * hist_rater_b[j]
                              / num_scored_items)
            d = pow(i - j, 2.0) / pow(num_ratings - 1, 2.0)
            numerator += d * conf_mat[i][j] / num_scored_items
            denominator += d * expected_count / num_scored_items

    return (1.0 - numerator / denominator)


In [None]:
# Optimal split thresholds
class OptimizedRounder(object):
    def __init__(self):
        self.coef_ = 0
    
    def _kappa_loss(self, coef, X, y):
        preds = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4])
        return -cohen_kappa_score(y, preds, weights='quadratic')
    
    def fit(self, X, y):
        loss_partial = partial(self._kappa_loss, X = X, y = y)
        initial_coef = [0.5, 1.5, 2.5, 3.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')
    
    def predict(self, X, coef):
        preds = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4])
        return preds
    
    def coefficients(self):
        return self.coef_['x']

In [None]:
# XGBoost
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold

xgb_params = {
    'eval_metric': 'rmse',
    'seed': 6,
    'eta': 0.001,
    'gamma': 2,
    'max_depth': 8,
    #'predictor': 'gpu_predictor',
    'subsample': 0.8,
    'colsample_bytree': 0.85,
    #'tree_method': 'gpu_hist',
    #'device': 'gpu',
    'silent': 1,
}

In [None]:
def run_xgb(params, X_train, X_test):
    n_splits = 10
    verbose_eval = 1000
    num_rounds = 60000
    early_stop = 500

    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=1337)

    oof_train = np.zeros((X_train.shape[0]))
    oof_test = np.zeros((X_test.shape[0], n_splits))

    i = 0

    for train_idx, valid_idx in kf.split(X_train, X_train['AdoptionSpeed'].values):

        X_tr = X_train.iloc[train_idx, :]
        X_val = X_train.iloc[valid_idx, :]

        y_tr = X_tr['AdoptionSpeed'].values
        X_tr = X_tr.drop(['AdoptionSpeed'], axis=1)

        y_val = X_val['AdoptionSpeed'].values
        X_val = X_val.drop(['AdoptionSpeed'], axis=1)

        d_train = xgb.DMatrix(data=X_tr, label=y_tr, feature_names=X_tr.columns)
        d_valid = xgb.DMatrix(data=X_val, label=y_val, feature_names=X_val.columns)

        watchlist = [(d_train, 'train'), (d_valid, 'valid')]
        model = xgb.train(dtrain=d_train, num_boost_round=num_rounds, evals=watchlist,
                         early_stopping_rounds=early_stop, verbose_eval=verbose_eval, params=params)

        valid_pred = model.predict(xgb.DMatrix(X_val, feature_names=X_val.columns), ntree_limit=model.best_ntree_limit)
        test_pred = model.predict(xgb.DMatrix(X_test, feature_names=X_test.columns), ntree_limit=model.best_ntree_limit)

        oof_train[valid_idx] = valid_pred
        oof_test[:, i] = test_pred

        i += 1
    return model, oof_train, oof_test


In [None]:
#model, oof_train, oof_test = run_xgb(xgb_params, X_train1, X_test)
model, oof_train, oof_test = run_xgb(xgb_params, X_train_non_null, X_test_non_null)


In [None]:
def plot_pred(pred):
    sns.distplot(pred, kde=True, hist_kws={'range': [0, 5]})

In [None]:
plot_pred(oof_train)

In [None]:
plot_pred(oof_test.mean(axis=1))

In [None]:
optR = OptimizedRounder()
optR.fit(oof_train, X_train1['AdoptionSpeed'].values)
coefficients = optR.coefficients()
valid_pred = optR.predict(oof_train, coefficients)
qwk = quadratic_weighted_kappa(X_train1['AdoptionSpeed'].values, valid_pred)
print("QWK = ", qwk)

In [None]:
coefficients

In [None]:
#coef = [1.66,2.13,2.47522154,2.85]
ttt = pd.cut(valid_pred, [-np.inf] + list(np.sort(coefficients)) + [np.inf], labels = [0, 1, 2, 3, 4])
quadratic_weighted_kappa(X_train1['AdoptionSpeed'].values, ttt)

In [None]:
train_predictions = optR.predict(oof_train, coefficients).astype(np.int8)
print(f'train pred distribution: {Counter(train_predictions)}')
test_predictions = optR.predict(oof_test.mean(axis=1), coefficients).astype(np.int8)
print(f'test pred distribution: {Counter(test_predictions)}')

In [None]:
coefficients_ = coefficients.copy()
coefficients_[0] = 1.66
coefficients_[1] = 2.13
coefficients_[3] = 2.85
train_predictions = optR.predict(oof_train, coefficients_).astype(np.int8)
print(f'train pred distribution: {Counter(train_predictions)}')
test_predictions = optR.predict(oof_test.mean(axis=1), coefficients_).astype(np.int8)
print(f'test pred distribution: {Counter(test_predictions)}')

In [None]:
ttt = pd.cut(valid_pred, [-np.inf] + list(np.sort(coefficients_)) + [np.inf], labels = [0, 1, 2, 3, 4])
quadratic_weighted_kappa(X_train1['AdoptionSpeed'].values, ttt)

In [None]:
# Submission
#test_PetID = pd.read_csv('../input/petfinder-adoption-prediction/test/test.csv').PetID
#submission = pd.DataFrame({'PetID':test_PetID, 'AdoptionSpeed':test_predictions})
submission = pd.DataFrame({'PetID': meta_test['PetID'].values, 'AdoptionSpeed': test_predictions})
submission.to_csv('submission.csv', index=False)
submission.head()