# HackerEarth Machine Learning Challenge: Pet Adoption

## Import relevant modules:

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Load dataset:

In [7]:
df = pd.read_csv('/content/test.csv')
df.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2
0,ANSL_75005,2005-08-17 00:00:00,2017-09-07 15:35:00,0.0,Black,0.87,42.73,0,7
1,ANSL_76663,2018-11-15 00:00:00,2019-05-08 17:24:00,1.0,Orange Tabby,0.06,6.71,0,1
2,ANSL_58259,2012-10-11 00:00:00,2018-04-02 16:51:00,1.0,Black,0.24,41.21,0,7
3,ANSL_67171,2015-02-13 00:00:00,2018-04-06 07:25:00,1.0,Black,0.29,8.46,7,1
4,ANSL_72871,2017-01-18 00:00:00,2018-04-26 13:42:00,1.0,Brown,0.71,30.92,0,7


In [6]:
df1 = pd.read_csv('/content/train.csv')
df1.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,,Brown,0.15,40.9,15,4,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1


## Analysis and feature selection:

In [8]:
targets = df1[['pet_id','breed_category','pet_category']]

In [9]:
df1 = df1.drop(['breed_category','pet_category'], axis = True)

In [10]:
test = pd.concat([df1,df])

In [11]:
test['condition'].fillna(1.0, inplace = True)

In [12]:
def encode(dataframe, feature):
    dummies = pd.get_dummies(dataframe[[feature]])
    res = pd.concat([dataframe, dummies], axis=1)
    return(res)

In [14]:
test = encode(test, 'color_type')

In [15]:
test.drop('color_type',axis = 1, inplace = True)

In [16]:
test['height(m)'] = test['height(cm)'].apply(lambda x:x*(.01))

In [17]:
test.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,length(m),height(cm),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m)
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,0.8,7.78,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0778
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,0.72,14.19,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1419
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,1.0,0.15,40.9,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.409
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,0.62,17.82,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1782
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,0.5,11.06,18,4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1106


In [18]:
test.tail()

Unnamed: 0,pet_id,issue_date,listing_date,condition,length(m),height(cm),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m)
8067,ANSL_66809,2016-02-10 00:00:00,2017-03-10 14:56:00,2.0,0.82,36.08,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.3608
8068,ANSL_59041,2015-12-07 00:00:00,2018-02-12 00:00:00,0.0,0.49,27.54,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.2754
8069,ANSL_60034,2015-12-08 00:00:00,2017-01-04 17:19:00,0.0,0.98,37.19,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.3719
8070,ANSL_58066,2016-06-28 00:00:00,2017-07-20 18:19:00,1.0,0.79,23.83,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.2383
8071,ANSL_69436,2016-02-02 00:00:00,2017-02-28 16:47:00,0.0,0.64,24.51,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.2451


In [19]:
train_df = test[:18834]
test_df = test[18834:]

In [20]:
train_df = pd.concat([train_df,targets], axis = 1)
train_df.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,length(m),height(cm),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m),pet_id.1,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,0.8,7.78,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0778,ANSL_69903,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,0.72,14.19,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1419,ANSL_66892,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,1.0,0.15,40.9,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.409,ANSL_69750,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,0.62,17.82,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1782,ANSL_71623,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,0.5,11.06,18,4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1106,ANSL_57969,0.0,1


In [21]:
test_df.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,length(m),height(cm),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m)
0,ANSL_75005,2005-08-17 00:00:00,2017-09-07 15:35:00,0.0,0.87,42.73,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4273
1,ANSL_76663,2018-11-15 00:00:00,2019-05-08 17:24:00,1.0,0.06,6.71,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0671
2,ANSL_58259,2012-10-11 00:00:00,2018-04-02 16:51:00,1.0,0.24,41.21,0,7,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4121
3,ANSL_67171,2015-02-13 00:00:00,2018-04-06 07:25:00,1.0,0.29,8.46,7,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0846
4,ANSL_72871,2017-01-18 00:00:00,2018-04-26 13:42:00,1.0,0.71,30.92,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.3092


In [22]:
train_df.drop('height(cm)', axis = 1, inplace = True)
test_df.drop('height(cm)', axis = 1, inplace = True)

In [23]:
train_df.columns.values

array(['pet_id', 'issue_date', 'listing_date', 'condition', 'length(m)',
       'X1', 'X2', 'color_type_Agouti', 'color_type_Apricot',
       'color_type_Black', 'color_type_Black Brindle',
       'color_type_Black Smoke', 'color_type_Black Tabby',
       'color_type_Black Tiger', 'color_type_Blue',
       'color_type_Blue Cream', 'color_type_Blue Merle',
       'color_type_Blue Point', 'color_type_Blue Smoke',
       'color_type_Blue Tabby', 'color_type_Blue Tick',
       'color_type_Blue Tiger', 'color_type_Brown',
       'color_type_Brown Brindle', 'color_type_Brown Merle',
       'color_type_Brown Tabby', 'color_type_Brown Tiger',
       'color_type_Buff', 'color_type_Calico', 'color_type_Calico Point',
       'color_type_Chocolate', 'color_type_Chocolate Point',
       'color_type_Cream', 'color_type_Cream Tabby', 'color_type_Fawn',
       'color_type_Flame Point', 'color_type_Gold', 'color_type_Gray',
       'color_type_Gray Tabby', 'color_type_Green',
       'color_type_Lilac Po

In [24]:
train_df.drop('issue_date', axis = 1, inplace = True)
train_df.drop('listing_date', axis = 1, inplace = True)
test_df.drop('issue_date', axis = 1, inplace = True)
test_df.drop('listing_date', axis = 1, inplace = True)

In [25]:
train_df = train_df.drop(df.columns[0], axis=1)
train_df

Unnamed: 0,condition,length(m),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m),breed_category,pet_category
0,2.0,0.80,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0778,0.0,1
1,1.0,0.72,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1419,0.0,2
2,1.0,0.15,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4090,2.0,4
3,1.0,0.62,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1782,0.0,2
4,2.0,0.50,18,4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1106,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18829,2.0,0.44,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.2736,0.0,2
18830,1.0,0.73,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1425,2.0,4
18831,0.0,0.99,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.2813,1.0,1
18832,0.0,0.55,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.4482,1.0,2


In [26]:
init_train_df = train_df.drop('pet_category', axis = 1)
init_train_df

Unnamed: 0,condition,length(m),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m),breed_category
0,2.0,0.80,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0778,0.0
1,1.0,0.72,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1419,0.0
2,1.0,0.15,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4090,2.0
3,1.0,0.62,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1782,0.0
4,2.0,0.50,18,4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1106,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18829,2.0,0.44,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.2736,0.0
18830,1.0,0.73,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1425,2.0
18831,0.0,0.99,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.2813,1.0
18832,0.0,0.55,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.4482,1.0


In [28]:
sec_train_df = train_df.drop('breed_category', axis = 1)
sec_train_df

Unnamed: 0,condition,length(m),X1,X2,color_type_Agouti,color_type_Apricot,color_type_Black,color_type_Black Brindle,color_type_Black Smoke,color_type_Black Tabby,color_type_Black Tiger,color_type_Blue,color_type_Blue Cream,color_type_Blue Merle,color_type_Blue Point,color_type_Blue Smoke,color_type_Blue Tabby,color_type_Blue Tick,color_type_Blue Tiger,color_type_Brown,color_type_Brown Brindle,color_type_Brown Merle,color_type_Brown Tabby,color_type_Brown Tiger,color_type_Buff,color_type_Calico,color_type_Calico Point,color_type_Chocolate,color_type_Chocolate Point,color_type_Cream,color_type_Cream Tabby,color_type_Fawn,color_type_Flame Point,color_type_Gold,color_type_Gray,color_type_Gray Tabby,color_type_Green,color_type_Lilac Point,color_type_Liver,color_type_Liver Tick,color_type_Lynx Point,color_type_Orange,color_type_Orange Tabby,color_type_Pink,color_type_Red,color_type_Red Merle,color_type_Red Tick,color_type_Sable,color_type_Seal Point,color_type_Silver,color_type_Silver Lynx Point,color_type_Silver Tabby,color_type_Tan,color_type_Torbie,color_type_Tortie,color_type_Tortie Point,color_type_Tricolor,color_type_White,color_type_Yellow,color_type_Yellow Brindle,height(m),pet_category
0,2.0,0.80,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0778,1
1,1.0,0.72,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1419,2
2,1.0,0.15,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.4090,4
3,1.0,0.62,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0.1782,2
4,2.0,0.50,18,4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1106,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18829,2.0,0.44,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.2736,2
18830,1.0,0.73,15,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.1425,4
18831,0.0,0.99,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.2813,1
18832,0.0,0.55,13,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.4482,2


## Model selection and evaluation:

In [29]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier
from sklearn.metrics import f1_score

### XGBoost with Random search:

In [30]:
X1_train = init_train_df.drop('breed_category', axis = 1)
Y1_test = init_train_df['breed_category']

In [31]:
params = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5],
        }

In [32]:
xgb = XGBClassifier(learning_rate = 0.02, n_estimators = 600, objective = 'multi:softmax', num_class = 3, nthread = 1)

folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits = folds, shuffle = True, random_state = 42)

random_search = RandomizedSearchCV(xgb, param_distributions=params, n_iter = param_comb, n_jobs = 4, cv = skf.split(X1_train,Y1_test), verbose = 3, random_state = 42 )

In [33]:
random_search.fit(X1_train,Y1_test)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  25 out of  25 | elapsed: 12.8min finished


RandomizedSearchCV(cv=<generator object _BaseKFold.split at 0x7f6c7c561a40>,
                   error_score=nan,
                   estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                           colsample_bylevel=1,
                                           colsample_bynode=1,
                                           colsample_bytree=1, gamma=0,
                                           learning_rate=0.02, max_delta_step=0,
                                           max_depth=3, min_child_weight=1,
                                           missing=None, n_estimators=600,
                                           n_jobs=1, nthread=1, num_class=3,
                                           objective='m...
                                           reg_lambda=1, scale_pos_weight=1,
                                           seed=None, silent=None, subsample=1,
                                           verbosity=1),
                   iid='depre

In [34]:
X2_train = sec_train_df.drop('pet_category', axis = 1)
Y2_test = sec_train_df['pet_category']

In [35]:
params = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5],
        }

In [36]:
xgb = XGBClassifier(learning_rate = 0.02, n_estimators = 600, objective = 'multi:softmax', num_class = 4, nthread = 1)

folds = 5
param_comb = 5

skf = StratifiedKFold(n_splits = folds, shuffle = True, random_state = 42)

random_search_1 = RandomizedSearchCV(xgb, param_distributions = params, n_iter = param_comb, n_jobs = 4, cv = skf.split(X2_train,Y2_test), verbose = 3, random_state = 42)

In [37]:
random_search_1.fit(X2_train,Y2_test)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  25 out of  25 | elapsed: 16.7min finished


RandomizedSearchCV(cv=<generator object _BaseKFold.split at 0x7f6c671b4410>,
                   error_score=nan,
                   estimator=XGBClassifier(base_score=0.5, booster='gbtree',
                                           colsample_bylevel=1,
                                           colsample_bynode=1,
                                           colsample_bytree=1, gamma=0,
                                           learning_rate=0.02, max_delta_step=0,
                                           max_depth=3, min_child_weight=1,
                                           missing=None, n_estimators=600,
                                           n_jobs=1, nthread=1, num_class=4,
                                           objective='m...
                                           reg_lambda=1, scale_pos_weight=1,
                                           seed=None, silent=None, subsample=1,
                                           verbosity=1),
                   iid='depre

### Evaluation:

In [38]:
test1 = test_df.drop('pet_id', axis =1)

In [39]:
y_test = random_search_1.predict(test1)

In [40]:
test2 = test_df.drop('pet_id', axis =1)

In [41]:
y_test1 = random_search.predict(test1)

In [42]:
results_df = pd.DataFrame(data = {'pet_id':test_df['pet_id'], 'breed_category':y_test1,'pet_category':y_test})
results_df.to_csv('submission.csv', index = False)