# Hackerearth Machine Learning Challenge: Adopt a Buddy

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
train = pd.read_csv('/Users/ag30103/Desktop/Personal/HackerrankML/Adopt_Buddy_Challenge/Dataset/train.csv')
test = pd.read_csv('/Users/ag30103/Desktop/Personal/HackerrankML/Adopt_Buddy_Challenge/Dataset/test.csv')


In [3]:
# View of top 5 rows of the train dataset
train.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,,Brown,0.15,40.9,15,4,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1


## Feature Engineering

In [4]:
# Check for nulls
train.isnull().sum()

pet_id               0
issue_date           0
listing_date         0
condition         1477
color_type           0
length(m)            0
height(cm)           0
X1                   0
X2                   0
breed_category       0
pet_category         0
dtype: int64

In [5]:
# Performing Data Imputation using KNNImputer from Sklearn module
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=5)
train['condition'] = np.round(imputer.fit_transform(train['condition'].values.reshape(-1, 1)))
test['condition'] = np.round(imputer.fit_transform(test['condition'].values.reshape(-1, 1)))

In [6]:
# Handling nulls by replacing with mode which is '1'
# train['condition'].fillna(1.0, inplace=True)
# test['condition'].fillna(1.0, inplace=True)

In [7]:
# Encoding the color column
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()

train['color_type'] = encoder.fit_transform(train.color_type)
test['color_type'] = encoder.transform(test.color_type)

In [8]:
# Handling dates
train['issue_date'] = pd.to_datetime(train['issue_date'])
train['listing_date'] = pd.to_datetime(train['listing_date'])
test['issue_date'] = pd.to_datetime(test['issue_date'])
test['listing_date'] = pd.to_datetime(test['listing_date'])

train['day_difference'] = (train['listing_date'] - train['issue_date']).dt.days
test['day_difference'] = (test['listing_date'] - test['issue_date']).dt.days


In [9]:
train.drop('pet_id', axis = 1, inplace = True)
train.drop('issue_date', axis = 1, inplace = True)
train.drop('listing_date', axis = 1, inplace = True)
test.drop('issue_date', axis = 1, inplace = True)
test.drop('listing_date', axis = 1, inplace = True)

In [10]:
X_train = train[['condition', 'color_type', 'length(m)', 'height(cm)', 'X1', 'X2', 'day_difference']].values
y_breed = np.array(train['breed_category'])
y_pet = np.array(train['pet_category'])

X_test = test[['condition', 'color_type', 'length(m)', 'height(cm)', 'X1', 'X2', 'day_difference']].values


## Splitting the dataset into the Training set and Val set

In [11]:
from sklearn.model_selection import train_test_split
X_breed_train, X_breed_val, y_breed_train, y_breed_val = train_test_split(X_train, y_breed, test_size = 0.2, random_state = 0)
X_pet_train, X_pet_val, y_pet_train, y_pet_val = train_test_split(X_train, y_pet, test_size = 0.2, random_state = 0)


## Building Gradient Boosting Classifier and performing predictions

In [12]:
# Building GB model for Breed classification
from  sklearn.ensemble import GradientBoostingClassifier
gbt_breed = GradientBoostingClassifier(random_state=0)
gbt_breed.fit(X_breed_train, y_breed_train)
gbt_breed.score(X_breed_val, y_breed_val)

0.8906291478630209

In [13]:
# Building GB model for Pet classification
from sklearn.ensemble import GradientBoostingClassifier
gbt_pet = GradientBoostingClassifier(random_state=0)
gbt_pet.fit(X_pet_train, y_pet_train)
gbt_pet.score(X_pet_val, y_pet_val)

0.8943456331298115

In [14]:
# Performing predictions on the Test dataset
gbt_breed_pred = gbt_breed.predict(X_test)
gbt_pet_pred = gbt_pet.predict(X_test)

## Building final models for Breed and Pet Classification

In [15]:
# 89.47
# Building GB model for Breed classification
gbt_breed = GradientBoostingClassifier(random_state=0, n_estimators = 1000)
gbt_breed.fit(X_train, y_breed)

# Building GB model for Pet classification
gbt_pet = GradientBoostingClassifier(random_state=0, n_estimators = 1000)
gbt_pet.fit(X_train, y_pet)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=1000,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=0, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [16]:
# Performing predictions on the Test dataset
gbt_breed_pred = gbt_breed.predict(X_test)
gbt_pet_pred = gbt_pet.predict(X_test)

In [17]:
# Generating submission file by combining necessary columns
final_gb = pd.DataFrame({'pet_id':test['pet_id'], 'breed_category':gbt_breed_pred, 'pet_category':gbt_pet_pred}).set_index('pet_id', drop=True)
final_gb.to_csv("submission_final_gb.csv")

## Building models using XGBoost

In [18]:
# 89.59
from xgboost import XGBClassifier

# Building XGB model for Breed classification
xgb_breed = XGBClassifier(random_state=0, n_estimators=1000)
xgb_breed.fit(X_train, y_breed)

# Building XGB model for Pet classification
xgb_pet = XGBClassifier(random_state=0, n_estimators=1000)
xgb_pet.fit(X_train, y_pet)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
              max_depth=3, min_child_weight=1, missing=None, n_estimators=1000,
              n_jobs=1, nthread=None, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
              seed=None, silent=True, subsample=1)

In [19]:
# Performing predictions on the Test dataset
xgb_breed_pred = xgb_breed.predict(X_test)
xgb_pet_pred = xgb_pet.predict(X_test)

In [20]:
# Generating submission file by combining necessary columns
final_xgb = pd.DataFrame({'pet_id':test['pet_id'], 'breed_category':xgb_breed_pred, 'pet_category':xgb_pet_pred}).set_index('pet_id', drop=True)
final_xgb.to_csv("submission_final_xgb.csv")


## Building models using Catboost

In [21]:
# 89.59
from catboost import CatBoostClassifier

# Building XGB model for Breed classification
cat_breed = CatBoostClassifier(n_estimators=1000, random_seed=0)
cat_breed.fit(X_train, y_breed)

# Building XGB model for Pet classification
cat_pet = CatBoostClassifier(n_estimators=1000, random_seed=0)
cat_pet.fit(X_train, y_pet)

Learning rate set to 0.091906
0:	learn: 0.9667349	total: 61ms	remaining: 1m
1:	learn: 0.8630273	total: 65.7ms	remaining: 32.8s
2:	learn: 0.7807196	total: 70.2ms	remaining: 23.3s
3:	learn: 0.7105990	total: 74.3ms	remaining: 18.5s
4:	learn: 0.6525861	total: 77.2ms	remaining: 15.4s
5:	learn: 0.6007161	total: 81.4ms	remaining: 13.5s
6:	learn: 0.5588495	total: 85.1ms	remaining: 12.1s
7:	learn: 0.5211916	total: 89ms	remaining: 11s
8:	learn: 0.4883474	total: 93.5ms	remaining: 10.3s
9:	learn: 0.4599593	total: 97.4ms	remaining: 9.64s
10:	learn: 0.4340762	total: 101ms	remaining: 9.12s
11:	learn: 0.4107782	total: 105ms	remaining: 8.68s
12:	learn: 0.3898935	total: 109ms	remaining: 8.3s
13:	learn: 0.3725400	total: 113ms	remaining: 7.98s
14:	learn: 0.3566062	total: 117ms	remaining: 7.69s
15:	learn: 0.3423023	total: 121ms	remaining: 7.45s
16:	learn: 0.3298398	total: 125ms	remaining: 7.23s
17:	learn: 0.3184199	total: 129ms	remaining: 7.03s
18:	learn: 0.3080848	total: 133ms	remaining: 6.86s
19:	learn: 

171:	learn: 0.1804797	total: 778ms	remaining: 3.75s
172:	learn: 0.1803957	total: 784ms	remaining: 3.75s
173:	learn: 0.1803363	total: 789ms	remaining: 3.75s
174:	learn: 0.1801988	total: 793ms	remaining: 3.74s
175:	learn: 0.1801036	total: 797ms	remaining: 3.73s
176:	learn: 0.1799935	total: 802ms	remaining: 3.73s
177:	learn: 0.1798566	total: 806ms	remaining: 3.72s
178:	learn: 0.1797955	total: 810ms	remaining: 3.71s
179:	learn: 0.1796148	total: 814ms	remaining: 3.71s
180:	learn: 0.1795398	total: 818ms	remaining: 3.7s
181:	learn: 0.1793483	total: 823ms	remaining: 3.7s
182:	learn: 0.1792460	total: 827ms	remaining: 3.69s
183:	learn: 0.1791215	total: 831ms	remaining: 3.68s
184:	learn: 0.1789946	total: 835ms	remaining: 3.68s
185:	learn: 0.1788609	total: 840ms	remaining: 3.67s
186:	learn: 0.1787639	total: 844ms	remaining: 3.67s
187:	learn: 0.1786621	total: 848ms	remaining: 3.66s
188:	learn: 0.1785387	total: 853ms	remaining: 3.66s
189:	learn: 0.1783194	total: 857ms	remaining: 3.65s
190:	learn: 0.

354:	learn: 0.1658261	total: 1.76s	remaining: 3.19s
355:	learn: 0.1657487	total: 1.76s	remaining: 3.19s
356:	learn: 0.1656854	total: 1.77s	remaining: 3.19s
357:	learn: 0.1656278	total: 1.78s	remaining: 3.19s
358:	learn: 0.1655382	total: 1.79s	remaining: 3.19s
359:	learn: 0.1654642	total: 1.79s	remaining: 3.19s
360:	learn: 0.1653951	total: 1.8s	remaining: 3.19s
361:	learn: 0.1653409	total: 1.81s	remaining: 3.19s
362:	learn: 0.1653068	total: 1.82s	remaining: 3.19s
363:	learn: 0.1652096	total: 1.82s	remaining: 3.19s
364:	learn: 0.1651575	total: 1.83s	remaining: 3.19s
365:	learn: 0.1650961	total: 1.84s	remaining: 3.18s
366:	learn: 0.1649946	total: 1.84s	remaining: 3.18s
367:	learn: 0.1648803	total: 1.85s	remaining: 3.18s
368:	learn: 0.1647421	total: 1.86s	remaining: 3.18s
369:	learn: 0.1646547	total: 1.87s	remaining: 3.18s
370:	learn: 0.1646147	total: 1.87s	remaining: 3.18s
371:	learn: 0.1645365	total: 1.88s	remaining: 3.17s
372:	learn: 0.1644824	total: 1.89s	remaining: 3.17s
373:	learn: 0

528:	learn: 0.1546671	total: 2.73s	remaining: 2.44s
529:	learn: 0.1546083	total: 2.74s	remaining: 2.43s
530:	learn: 0.1545652	total: 2.75s	remaining: 2.43s
531:	learn: 0.1544820	total: 2.75s	remaining: 2.42s
532:	learn: 0.1544423	total: 2.76s	remaining: 2.42s
533:	learn: 0.1544091	total: 2.76s	remaining: 2.41s
534:	learn: 0.1543684	total: 2.77s	remaining: 2.4s
535:	learn: 0.1543184	total: 2.77s	remaining: 2.4s
536:	learn: 0.1542677	total: 2.77s	remaining: 2.39s
537:	learn: 0.1542116	total: 2.78s	remaining: 2.38s
538:	learn: 0.1541373	total: 2.78s	remaining: 2.38s
539:	learn: 0.1540807	total: 2.79s	remaining: 2.37s
540:	learn: 0.1540560	total: 2.79s	remaining: 2.37s
541:	learn: 0.1539946	total: 2.79s	remaining: 2.36s
542:	learn: 0.1539118	total: 2.8s	remaining: 2.35s
543:	learn: 0.1538580	total: 2.8s	remaining: 2.35s
544:	learn: 0.1538026	total: 2.81s	remaining: 2.34s
545:	learn: 0.1537283	total: 2.81s	remaining: 2.34s
546:	learn: 0.1536965	total: 2.82s	remaining: 2.33s
547:	learn: 0.15

695:	learn: 0.1456500	total: 3.93s	remaining: 1.72s
696:	learn: 0.1455912	total: 3.94s	remaining: 1.71s
697:	learn: 0.1455513	total: 3.94s	remaining: 1.71s
698:	learn: 0.1455139	total: 3.95s	remaining: 1.7s
699:	learn: 0.1454284	total: 3.96s	remaining: 1.7s
700:	learn: 0.1453973	total: 3.96s	remaining: 1.69s
701:	learn: 0.1453493	total: 3.97s	remaining: 1.69s
702:	learn: 0.1453014	total: 3.98s	remaining: 1.68s
703:	learn: 0.1452301	total: 3.98s	remaining: 1.67s
704:	learn: 0.1451834	total: 3.99s	remaining: 1.67s
705:	learn: 0.1451645	total: 4s	remaining: 1.66s
706:	learn: 0.1451088	total: 4s	remaining: 1.66s
707:	learn: 0.1450762	total: 4.01s	remaining: 1.65s
708:	learn: 0.1449966	total: 4.02s	remaining: 1.65s
709:	learn: 0.1449493	total: 4.02s	remaining: 1.64s
710:	learn: 0.1448911	total: 4.03s	remaining: 1.64s
711:	learn: 0.1448653	total: 4.04s	remaining: 1.63s
712:	learn: 0.1447940	total: 4.04s	remaining: 1.63s
713:	learn: 0.1447513	total: 4.05s	remaining: 1.62s
714:	learn: 0.144684

886:	learn: 0.1373033	total: 5.1s	remaining: 650ms
887:	learn: 0.1372603	total: 5.11s	remaining: 644ms
888:	learn: 0.1371925	total: 5.11s	remaining: 638ms
889:	learn: 0.1371389	total: 5.12s	remaining: 632ms
890:	learn: 0.1370891	total: 5.12s	remaining: 626ms
891:	learn: 0.1370306	total: 5.12s	remaining: 620ms
892:	learn: 0.1369765	total: 5.13s	remaining: 615ms
893:	learn: 0.1369472	total: 5.13s	remaining: 609ms
894:	learn: 0.1368996	total: 5.14s	remaining: 603ms
895:	learn: 0.1368780	total: 5.14s	remaining: 597ms
896:	learn: 0.1368417	total: 5.15s	remaining: 591ms
897:	learn: 0.1368059	total: 5.15s	remaining: 585ms
898:	learn: 0.1367730	total: 5.16s	remaining: 579ms
899:	learn: 0.1367493	total: 5.16s	remaining: 573ms
900:	learn: 0.1367257	total: 5.17s	remaining: 568ms
901:	learn: 0.1366411	total: 5.17s	remaining: 562ms
902:	learn: 0.1365041	total: 5.17s	remaining: 556ms
903:	learn: 0.1364784	total: 5.18s	remaining: 550ms
904:	learn: 0.1364215	total: 5.18s	remaining: 544ms
905:	learn: 0

55:	learn: 0.3699606	total: 336ms	remaining: 5.66s
56:	learn: 0.3690327	total: 345ms	remaining: 5.71s
57:	learn: 0.3670193	total: 351ms	remaining: 5.7s
58:	learn: 0.3665778	total: 357ms	remaining: 5.7s
59:	learn: 0.3648319	total: 363ms	remaining: 5.69s
60:	learn: 0.3640640	total: 368ms	remaining: 5.66s
61:	learn: 0.3616331	total: 374ms	remaining: 5.66s
62:	learn: 0.3610536	total: 379ms	remaining: 5.64s
63:	learn: 0.3605039	total: 384ms	remaining: 5.62s
64:	learn: 0.3590550	total: 389ms	remaining: 5.6s
65:	learn: 0.3563772	total: 394ms	remaining: 5.58s
66:	learn: 0.3552532	total: 399ms	remaining: 5.55s
67:	learn: 0.3546461	total: 404ms	remaining: 5.53s
68:	learn: 0.3515564	total: 409ms	remaining: 5.51s
69:	learn: 0.3502403	total: 414ms	remaining: 5.49s
70:	learn: 0.3497077	total: 418ms	remaining: 5.47s
71:	learn: 0.3484180	total: 423ms	remaining: 5.45s
72:	learn: 0.3470740	total: 428ms	remaining: 5.44s
73:	learn: 0.3454495	total: 434ms	remaining: 5.43s
74:	learn: 0.3438594	total: 439ms	

221:	learn: 0.2755066	total: 1.3s	remaining: 4.57s
222:	learn: 0.2753982	total: 1.31s	remaining: 4.58s
223:	learn: 0.2752811	total: 1.32s	remaining: 4.58s
224:	learn: 0.2751153	total: 1.33s	remaining: 4.57s
225:	learn: 0.2748152	total: 1.33s	remaining: 4.56s
226:	learn: 0.2746770	total: 1.34s	remaining: 4.55s
227:	learn: 0.2742489	total: 1.34s	remaining: 4.55s
228:	learn: 0.2735273	total: 1.35s	remaining: 4.54s
229:	learn: 0.2733529	total: 1.35s	remaining: 4.54s
230:	learn: 0.2732277	total: 1.36s	remaining: 4.53s
231:	learn: 0.2725583	total: 1.37s	remaining: 4.53s
232:	learn: 0.2724039	total: 1.37s	remaining: 4.52s
233:	learn: 0.2722359	total: 1.38s	remaining: 4.51s
234:	learn: 0.2720861	total: 1.38s	remaining: 4.51s
235:	learn: 0.2719603	total: 1.39s	remaining: 4.5s
236:	learn: 0.2717857	total: 1.4s	remaining: 4.5s
237:	learn: 0.2716083	total: 1.4s	remaining: 4.49s
238:	learn: 0.2714067	total: 1.41s	remaining: 4.49s
239:	learn: 0.2706319	total: 1.41s	remaining: 4.48s
240:	learn: 0.270

391:	learn: 0.2479714	total: 2.28s	remaining: 3.53s
392:	learn: 0.2477342	total: 2.29s	remaining: 3.53s
393:	learn: 0.2476208	total: 2.29s	remaining: 3.53s
394:	learn: 0.2474191	total: 2.3s	remaining: 3.52s
395:	learn: 0.2473619	total: 2.3s	remaining: 3.51s
396:	learn: 0.2473056	total: 2.31s	remaining: 3.51s
397:	learn: 0.2472094	total: 2.31s	remaining: 3.5s
398:	learn: 0.2471329	total: 2.32s	remaining: 3.49s
399:	learn: 0.2469865	total: 2.33s	remaining: 3.49s
400:	learn: 0.2468700	total: 2.33s	remaining: 3.48s
401:	learn: 0.2467936	total: 2.33s	remaining: 3.47s
402:	learn: 0.2466608	total: 2.34s	remaining: 3.47s
403:	learn: 0.2465524	total: 2.35s	remaining: 3.46s
404:	learn: 0.2464736	total: 2.35s	remaining: 3.45s
405:	learn: 0.2463798	total: 2.35s	remaining: 3.44s
406:	learn: 0.2463048	total: 2.36s	remaining: 3.44s
407:	learn: 0.2461357	total: 2.37s	remaining: 3.43s
408:	learn: 0.2460644	total: 2.37s	remaining: 3.42s
409:	learn: 0.2459929	total: 2.37s	remaining: 3.42s
410:	learn: 0.2

559:	learn: 0.2294181	total: 3.26s	remaining: 2.56s
560:	learn: 0.2293548	total: 3.26s	remaining: 2.55s
561:	learn: 0.2292708	total: 3.27s	remaining: 2.55s
562:	learn: 0.2291869	total: 3.28s	remaining: 2.54s
563:	learn: 0.2290457	total: 3.28s	remaining: 2.54s
564:	learn: 0.2289975	total: 3.29s	remaining: 2.53s
565:	learn: 0.2288766	total: 3.29s	remaining: 2.52s
566:	learn: 0.2287964	total: 3.3s	remaining: 2.52s
567:	learn: 0.2287389	total: 3.31s	remaining: 2.52s
568:	learn: 0.2286836	total: 3.31s	remaining: 2.51s
569:	learn: 0.2285836	total: 3.32s	remaining: 2.5s
570:	learn: 0.2284920	total: 3.33s	remaining: 2.5s
571:	learn: 0.2284003	total: 3.33s	remaining: 2.49s
572:	learn: 0.2283089	total: 3.34s	remaining: 2.49s
573:	learn: 0.2282419	total: 3.35s	remaining: 2.48s
574:	learn: 0.2281615	total: 3.35s	remaining: 2.48s
575:	learn: 0.2281000	total: 3.36s	remaining: 2.47s
576:	learn: 0.2279856	total: 3.36s	remaining: 2.46s
577:	learn: 0.2278995	total: 3.37s	remaining: 2.46s
578:	learn: 0.2

723:	learn: 0.2158977	total: 4.24s	remaining: 1.62s
724:	learn: 0.2158321	total: 4.25s	remaining: 1.61s
725:	learn: 0.2157721	total: 4.26s	remaining: 1.61s
726:	learn: 0.2157048	total: 4.26s	remaining: 1.6s
727:	learn: 0.2156313	total: 4.27s	remaining: 1.59s
728:	learn: 0.2155559	total: 4.27s	remaining: 1.59s
729:	learn: 0.2154584	total: 4.28s	remaining: 1.58s
730:	learn: 0.2154184	total: 4.29s	remaining: 1.58s
731:	learn: 0.2153486	total: 4.29s	remaining: 1.57s
732:	learn: 0.2152806	total: 4.3s	remaining: 1.56s
733:	learn: 0.2151689	total: 4.3s	remaining: 1.56s
734:	learn: 0.2150905	total: 4.31s	remaining: 1.55s
735:	learn: 0.2149703	total: 4.32s	remaining: 1.55s
736:	learn: 0.2149206	total: 4.32s	remaining: 1.54s
737:	learn: 0.2148079	total: 4.33s	remaining: 1.54s
738:	learn: 0.2146422	total: 4.33s	remaining: 1.53s
739:	learn: 0.2146018	total: 4.34s	remaining: 1.52s
740:	learn: 0.2145192	total: 4.35s	remaining: 1.52s
741:	learn: 0.2144692	total: 4.35s	remaining: 1.51s
742:	learn: 0.2

906:	learn: 0.2024509	total: 5.42s	remaining: 556ms
907:	learn: 0.2024079	total: 5.43s	remaining: 550ms
908:	learn: 0.2023617	total: 5.44s	remaining: 544ms
909:	learn: 0.2023096	total: 5.44s	remaining: 538ms
910:	learn: 0.2022438	total: 5.45s	remaining: 532ms
911:	learn: 0.2021899	total: 5.46s	remaining: 526ms
912:	learn: 0.2021370	total: 5.46s	remaining: 521ms
913:	learn: 0.2021133	total: 5.47s	remaining: 515ms
914:	learn: 0.2020547	total: 5.47s	remaining: 509ms
915:	learn: 0.2020350	total: 5.48s	remaining: 503ms
916:	learn: 0.2019659	total: 5.49s	remaining: 497ms
917:	learn: 0.2018972	total: 5.49s	remaining: 491ms
918:	learn: 0.2018340	total: 5.5s	remaining: 485ms
919:	learn: 0.2017313	total: 5.5s	remaining: 479ms
920:	learn: 0.2016951	total: 5.51s	remaining: 473ms
921:	learn: 0.2016302	total: 5.52s	remaining: 467ms
922:	learn: 0.2015575	total: 5.52s	remaining: 461ms
923:	learn: 0.2014935	total: 5.53s	remaining: 455ms
924:	learn: 0.2014320	total: 5.53s	remaining: 449ms
925:	learn: 0.

<catboost.core.CatBoostClassifier at 0x7feee949ebd0>

In [22]:
# Performing predictions on the Test dataset
cat_breed_pred = cat_breed.predict(X_test)
cat_pet_pred = cat_pet.predict(X_test)

In [23]:
cat_breed_pred = np.array(cat_breed_pred).ravel()
cat_pet_pred = np.array(cat_pet_pred).ravel()

In [24]:
# Generating submission file by combining necessary columns
final_cat = pd.DataFrame({'pet_id':test['pet_id'], 'breed_category':cat_breed_pred, 'pet_category':cat_pet_pred}).set_index('pet_id', drop=True)
final_cat.to_csv("submission_final_cat.csv")

## Building LGBM model

In [25]:
# 88.86
from lightgbm import LGBMClassifier

# Building LGBM model for Breed classification
lgbm_breed = LGBMClassifier(random_state=0, n_estimators=1000)
lgbm_breed.fit(X_train, y_breed)

# Building LGBM model for Pet classification
lgbm_pet = LGBMClassifier(random_state=0, n_estimators=1000)
lgbm_pet.fit(X_train, y_pet)

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=1000, n_jobs=-1, num_leaves=31, objective=None,
               random_state=0, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [26]:
# Performing predictions on the Test dataset
lgbm_breed_pred = lgbm_breed.predict(X_test)
lgbm_pet_pred = lgbm_pet.predict(X_test)

In [27]:
# Generating submission file by combining necessary columns
final_lgbm = pd.DataFrame({'pet_id':test['pet_id'], 'breed_category':lgbm_breed_pred, 'pet_category':lgbm_pet_pred}).set_index('pet_id', drop=True)
final_lgbm.to_csv("submission_final_lgbm.csv")