In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import random

In [3]:
random.seed(101)

In [4]:
import datetime

In [5]:
train = pd.read_csv('data/dataset/train.csv')
test = pd.read_csv('data/dataset/test.csv')

In [6]:
train.head()

Unnamed: 0,pet_id,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category
0,ANSL_69903,2016-07-10 00:00:00,2016-09-21 16:25:00,2.0,Brown Tabby,0.8,7.78,13,9,0.0,1
1,ANSL_66892,2013-11-21 00:00:00,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2
2,ANSL_69750,2014-09-28 00:00:00,2016-10-19 08:24:00,,Brown,0.15,40.9,15,4,2.0,4
3,ANSL_71623,2016-12-31 00:00:00,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2
4,ANSL_57969,2017-09-28 00:00:00,2017-11-19 09:38:00,2.0,Black,0.5,11.06,18,4,0.0,1


In [7]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18834 entries, 0 to 18833
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   pet_id          18834 non-null  object 
 1   issue_date      18834 non-null  object 
 2   listing_date    18834 non-null  object 
 3   condition       17357 non-null  float64
 4   color_type      18834 non-null  object 
 5   length(m)       18834 non-null  float64
 6   height(cm)      18834 non-null  float64
 7   X1              18834 non-null  int64  
 8   X2              18834 non-null  int64  
 9   breed_category  18834 non-null  float64
 10  pet_category    18834 non-null  int64  
dtypes: float64(4), int64(3), object(4)
memory usage: 1.6+ MB


In [8]:
train.corr()

Unnamed: 0,condition,length(m),height(cm),X1,X2,breed_category,pet_category
condition,1.0,-0.011219,-0.010793,0.338843,0.381696,-0.483503,-0.04166
length(m),-0.011219,1.0,-0.004464,-0.002893,-0.011175,0.007229,-0.003999
height(cm),-0.010793,-0.004464,1.0,-0.003801,-0.008216,0.011647,0.001976
X1,0.338843,-0.002893,-0.003801,1.0,0.584396,0.240729,-0.032594
X2,0.381696,-0.011175,-0.008216,0.584396,1.0,0.05253,-0.032116
breed_category,-0.483503,0.007229,0.011647,0.240729,0.05253,1.0,0.20923
pet_category,-0.04166,-0.003999,0.001976,-0.032594,-0.032116,0.20923,1.0


In [9]:
train[train["condition"].isnull()]["breed_category"].value_counts()

2.0    1477
Name: breed_category, dtype: int64

In [10]:
train[train["condition"].isnull()]["pet_category"].value_counts()

4    783
1    583
2     60
0     51
Name: pet_category, dtype: int64

In [11]:
train.groupby(train["X2"])["condition"].value_counts()

X2  condition
0   2.0             3
    1.0             1
1   1.0          4037
    0.0          4029
    2.0           406
2   2.0           108
    0.0            14
    1.0            11
3   2.0            11
    0.0             1
4   2.0           319
    1.0           118
    0.0            77
5   2.0             9
    0.0             2
    1.0             1
7   1.0          1489
    0.0          1133
    2.0           963
8   1.0            29
    0.0            25
    2.0             4
9   2.0          2434
    1.0          1133
    0.0          1000
Name: condition, dtype: int64

In [12]:
impute_dict = {0:2.0,1:1.0,2:2.0,3:2.0,4:2.0,5:2.0,7:1.0,8:1.0,9:2.0}
def impute(cols):
    cond = cols[0]
    x2 = cols[1]
    
    if pd.isnull(cond):
        if x2 in impute_dict.keys():
            return impute_dict[x2]
        else:
            return 1.0
    else:
        return cond

In [13]:
train_data=train.drop("pet_id",axis=1)
test_data = test.drop("pet_id",axis=1)

In [14]:
train_data["issue_date"]  = list(map(pd.Timestamp.date,list(map(pd.Timestamp,train_data["issue_date"]))))
test_data["issue_date"]  = list(map(pd.Timestamp.date,list(map(pd.Timestamp,test_data["issue_date"]))))

In [15]:
train_data['list_date'] = list(map(pd.Timestamp.date,list(map(pd.Timestamp,train_data["listing_date"]))))
test_data['list_date'] = list(map(pd.Timestamp.date,list(map(pd.Timestamp,test_data["listing_date"]))))

In [16]:
train_data["gap_days"] = train_data["list_date"]-train_data["issue_date"]
test_data["gap_days"] = test_data["list_date"]-test_data["issue_date"]

In [17]:
train_data["gap_days"]=train_data["gap_days"].apply(lambda x: x.days)
test_data["gap_days"]=test_data["gap_days"].apply(lambda x: x.days)

In [18]:
def time_of_day(col):
    hour = pd.Timestamp.time(pd.Timestamp(col)).hour
    if hour in range(0,6,1):
        return "dawn"
    elif hour in range(6,12,1):
        return "morning"
    elif hour in range(12,18,1):
        return "afternoon"
    elif hour in range(18,24,1):
        return "night"

In [19]:
train_data["time_of_day"]=train_data["listing_date"].apply(time_of_day)
test_data["time_of_day"]=test_data["listing_date"].apply(time_of_day)

In [20]:
train_data["list_year"] = train_data["list_date"].apply(lambda x: x.year)

test_data["list_year"] = test_data["list_date"].apply(lambda x: x.year)

In [21]:
train_data["list_season"] = pd.cut(train_data["list_date"].apply(lambda x: x.month),3,labels=["spring","summer","winter"])

test_data["list_season"] = pd.cut(test_data["list_date"].apply(lambda x: x.month), 3, labels=["spring","summer","winter"])

In [22]:
train_data["list_time_month"] = pd.cut(train_data["list_date"].apply(lambda x: x.day),4,labels=["w1","w2","w3","w4"])
test_data["list_time_month"] = pd.cut(test_data["list_date"].apply(lambda x: x.day),4,labels=["w1","w2","w3","w4"])

In [23]:
train_data

Unnamed: 0,issue_date,listing_date,condition,color_type,length(m),height(cm),X1,X2,breed_category,pet_category,list_date,gap_days,time_of_day,list_year,list_season,list_time_month
0,2016-07-10,2016-09-21 16:25:00,2.0,Brown Tabby,0.80,7.78,13,9,0.0,1,2016-09-21,73,afternoon,2016,winter,w3
1,2013-11-21,2018-12-27 17:47:00,1.0,White,0.72,14.19,13,9,0.0,2,2018-12-27,1862,afternoon,2018,winter,w4
2,2014-09-28,2016-10-19 08:24:00,,Brown,0.15,40.90,15,4,2.0,4,2016-10-19,752,morning,2016,winter,w3
3,2016-12-31,2019-01-25 18:30:00,1.0,White,0.62,17.82,0,1,0.0,2,2019-01-25,755,night,2019,spring,w4
4,2017-09-28,2017-11-19 09:38:00,2.0,Black,0.50,11.06,18,4,0.0,1,2017-11-19,52,morning,2017,winter,w3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18829,2017-01-26,2018-03-09 15:35:00,2.0,Tricolor,0.44,27.36,0,1,0.0,2,2018-03-09,407,afternoon,2018,spring,w2
18830,2016-06-18,2017-07-09 08:37:00,,Brown,0.73,14.25,15,4,2.0,4,2017-07-09,386,morning,2017,summer,w2
18831,2010-07-21,2018-08-22 14:27:00,0.0,Calico Point,0.99,28.13,13,9,1.0,1,2018-08-22,2954,afternoon,2018,summer,w3
18832,2017-05-12,2018-02-08 14:05:00,0.0,Tan,0.55,44.82,13,9,1.0,2,2018-02-08,272,afternoon,2018,spring,w1


In [24]:
train_data.drop(["issue_date","list_date","listing_date"],axis=1,inplace=True)
test_data.drop(["issue_date","list_date","listing_date"],axis=1,inplace=True)

In [25]:
train_data["rare"]=train_data["color_type"].apply(lambda x: 1 if x in train.color_type.value_counts().keys()[-8:] else 0)
test_data["rare"]=test_data["color_type"].apply(lambda x: 1 if x in train.color_type.value_counts().keys()[-8:] else 0)

In [26]:
train_data.corr()

Unnamed: 0,condition,length(m),height(cm),X1,X2,breed_category,pet_category,gap_days,list_year,rare
condition,1.0,-0.011219,-0.010793,0.338843,0.381696,-0.483503,-0.04166,-0.101837,-0.015917,0.0138
length(m),-0.011219,1.0,-0.004464,-0.002893,-0.011175,0.007229,-0.003999,0.001771,-0.005259,-0.000507
height(cm),-0.010793,-0.004464,1.0,-0.003801,-0.008216,0.011647,0.001976,-0.000518,0.005802,-0.003184
X1,0.338843,-0.002893,-0.003801,1.0,0.584396,0.240729,-0.032594,-0.102619,-0.073691,-0.00328
X2,0.381696,-0.011175,-0.008216,0.584396,1.0,0.05253,-0.032116,0.118689,-0.05096,3.9e-05
breed_category,-0.483503,0.007229,0.011647,0.240729,0.05253,1.0,0.20923,-0.102774,-0.034661,0.017431
pet_category,-0.04166,-0.003999,0.001976,-0.032594,-0.032116,0.20923,1.0,0.093331,0.036504,0.025573
gap_days,-0.101837,0.001771,-0.000518,-0.102619,0.118689,-0.102774,0.093331,1.0,0.038684,-0.009601
list_year,-0.015917,-0.005259,0.005802,-0.073691,-0.05096,-0.034661,0.036504,0.038684,1.0,-0.003088
rare,0.0138,-0.000507,-0.003184,-0.00328,3.9e-05,0.017431,0.025573,-0.009601,-0.003088,1.0


In [27]:
color = pd.get_dummies(train_data["color_type"],drop_first=True)
t_color = pd.get_dummies(test_data["color_type"],drop_first=True)

train_data = pd.concat([train_data.drop("color_type",axis=1),color],axis=1)
test_data = pd.concat([test_data.drop("color_type",axis=1),t_color],axis=1)

In [31]:
test_data.columns

Index(['condition', 'length(m)', 'height(cm)', 'X1', 'X2', 'gap_days',
       'time_of_day', 'list_year', 'list_season', 'list_time_month', 'rare',
       'Apricot', 'Black', 'Black Brindle', 'Black Smoke', 'Black Tabby',
       'Black Tiger', 'Blue', 'Blue Cream', 'Blue Merle', 'Blue Point',
       'Blue Smoke', 'Blue Tabby', 'Blue Tick', 'Blue Tiger', 'Brown',
       'Brown Brindle', 'Brown Merle', 'Brown Tabby', 'Brown Tiger', 'Buff',
       'Calico', 'Calico Point', 'Chocolate', 'Chocolate Point', 'Cream',
       'Cream Tabby', 'Fawn', 'Flame Point', 'Gold', 'Gray', 'Gray Tabby',
       'Green', 'Lilac Point', 'Liver', 'Liver Tick', 'Lynx Point', 'Orange',
       'Orange Tabby', 'Pink', 'Red', 'Red Merle', 'Red Tick', 'Sable',
       'Seal Point', 'Silver', 'Silver Lynx Point', 'Silver Tabby', 'Tan',
       'Torbie', 'Tortie', 'Tortie Point', 'Tricolor', 'White', 'Yellow',
       'Yellow Brindle'],
      dtype='object')

In [29]:
test_data.insert(16,value=np.zeros(8072),column="Black Tiger")

In [30]:
test_data.insert(29,value=np.zeros(8072),column="Brown Tiger")

In [32]:
time= pd.get_dummies(train_data["time_of_day"],drop_first=True)
t_time = pd.get_dummies(test_data["time_of_day"],drop_first=True)

train_data = pd.concat([train_data.drop("time_of_day",axis=1),time],axis=1)
test_data = pd.concat([test_data.drop("time_of_day",axis=1),t_time],axis=1)

In [33]:
from sklearn.preprocessing import LabelEncoder

In [34]:
encode = LabelEncoder()


In [35]:
train_data["list_year"]=encode.fit_transform(train_data["list_year"])
test_data["list_year"]=encode.transform(test_data["list_year"])

In [36]:
season = pd.get_dummies(train_data["list_season"],drop_first=True)
season_t = pd.get_dummies(test_data["list_season"],drop_first=True)

train_data = pd.concat([train_data.drop("list_season",axis=1),season],axis=1)
test_data = pd.concat([test_data.drop("list_season",axis=1),season_t],axis=1)

In [37]:
month = pd.get_dummies(train_data["list_time_month"],drop_first=True)
month_t = pd.get_dummies(test_data["list_time_month"],drop_first=True)

train_data = pd.concat([train_data.drop("list_time_month",axis=1),month],axis=1)
test_data = pd.concat([test_data.drop("list_time_month",axis=1),month_t],axis=1)

In [38]:
train_data["pet_size"]=pd.cut(train_data[["length(m)","height(cm)"]].apply(lambda cols: cols[0]*cols[1] ,axis=1),4,labels=["small","little","medium","large"])
test_data["pet_size"]=pd.cut(test_data[["length(m)","height(cm)"]].apply(lambda cols: cols[0]*cols[1] ,axis=1),4,labels=["small","little","medium","large"])


In [39]:
size = pd.get_dummies(train_data["pet_size"],drop_first=True)
size_t = pd.get_dummies(test_data["pet_size"],drop_first=True)

train_data = pd.concat([train_data.drop("pet_size",axis=1),size],axis=1)
test_data = pd.concat([test_data.drop("pet_size",axis=1),size_t],axis=1)

In [40]:
train_data

Unnamed: 0,condition,length(m),height(cm),X1,X2,breed_category,pet_category,gap_days,list_year,rare,...,morning,night,summer,winter,w2,w3,w4,little,medium,large
0,2.0,0.80,7.78,13,9,0.0,1,73,1,0,...,0,0,0,1,0,1,0,0,0,0
1,1.0,0.72,14.19,13,9,0.0,2,1862,3,0,...,0,0,0,1,0,0,1,0,0,0
2,,0.15,40.90,15,4,2.0,4,752,1,0,...,1,0,0,1,0,1,0,0,0,0
3,1.0,0.62,17.82,0,1,0.0,2,755,4,0,...,0,1,0,0,0,0,1,0,0,0
4,2.0,0.50,11.06,18,4,0.0,1,52,2,0,...,1,0,0,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18829,2.0,0.44,27.36,0,1,0.0,2,407,3,0,...,0,0,0,0,1,0,0,0,0,0
18830,,0.73,14.25,15,4,2.0,4,386,2,0,...,1,0,1,0,1,0,0,0,0,0
18831,0.0,0.99,28.13,13,9,1.0,1,2954,3,0,...,0,0,1,0,0,1,0,0,1,0
18832,0.0,0.55,44.82,13,9,1.0,2,272,3,0,...,0,0,0,0,0,0,0,1,0,0


train_data["gap_days"]=pd.cut(train_data["gap_days"],4,labels=["short_time","few_years","long_time","very_long_time"])
test_data["gap_days"]=pd.cut(test_data["gap_days"],4,labels=["short_time","few_years","long_time","very_long_time"])


days = pd.get_dummies(train_data["gap_days"],drop_first=True)
days_t = pd.get_dummies(test_data["gap_days"],drop_first=True)

train_data = pd.concat([train_data.drop("gap_days",axis=1),days],axis=1)
test_data = pd.concat([test_data.drop("gap_days",axis=1),days_t],axis=1)

In [41]:
train_data2 = train_data.copy()
test_data2 = test_data.copy()

In [42]:
train_data.drop(["length(m)","height(cm)"],axis=1,inplace=True)

In [43]:
train_data2.drop(["length(m)","height(cm)"],axis=1,inplace=True)

In [44]:
test_data.drop(["length(m)","height(cm)"],axis=1,inplace=True)

In [45]:
test_data2.drop(["length(m)","height(cm)"],axis=1,inplace=True)

In [46]:
train_data["condition"]=train_data["condition"].fillna(3)
test_data["condition"]=test_data["condition"].fillna(3)
train_data2["condition"]=train_data2["condition"].fillna(3)
test_data2["condition"]=test_data2["condition"].fillna(3)

train_data2["condition"]=train_data2[["condition","X2"]].apply(impute,axis=1)
test_data2["condition"]=test_data2[["condition","X2"]].apply(impute,axis=1)

In [47]:
cond = pd.get_dummies(train_data["condition"],drop_first=True)
cond_t = pd.get_dummies(test_data["condition"],drop_first=True)

train_data= pd.concat([train_data.drop("condition",axis=1),cond],axis=1)
test_data = pd.concat([test_data.drop("condition",axis=1),cond_t],axis=1)

cond2 = pd.get_dummies(train_data2["condition"],drop_first=True)
cond2_t = pd.get_dummies(test_data2["condition"],drop_first=True)

train_data2= pd.concat([train_data2.drop("condition",axis=1),cond2],axis=1)
test_data2 = pd.concat([test_data2.drop("condition",axis=1),cond2_t],axis=1)

In [79]:
x_train = train_data.drop(['breed_category','pet_category'],axis=1).values
x_test = test_data.values

x_train2 = train_data2.drop(['breed_category','pet_category'],axis=1).values
x_test2 = test_data2.values

In [80]:
y_train_breed = train_data["breed_category"]
y_train_pet = train_data2["pet_category"]

In [50]:
from sklearn.preprocessing import StandardScaler

In [81]:
sc = StandardScaler()

In [82]:
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [83]:
sc2 = StandardScaler()

In [84]:
x_train2 = sc2.fit_transform(x_train2)
x_test2 = sc2.transform(x_test2)

In [85]:
train["pet_category"].value_counts()

2    10621
1     7184
4      941
0       88
Name: pet_category, dtype: int64

In [86]:
train["breed_category"].value_counts()

0.0    9000
1.0    8357
2.0    1477
Name: breed_category, dtype: int64

In [87]:
from imblearn.over_sampling import SMOTE

In [88]:
sm_pet=SMOTE(sampling_strategy={0:941})

x_train2,y_train_pet = sm_pet.fit_resample(x_train2,y_train_pet)

y_train_pet.value_counts()

2    10621
1     7184
4      941
0      941
Name: pet_category, dtype: int64

In [89]:
sm_breed=SMOTE(sampling_strategy={2:1500})

x_train,y_train_breed = sm_breed.fit_resample(x_train,y_train_breed)

y_train_breed.value_counts()

0.0    9000
1.0    8357
2.0    1500
Name: breed_category, dtype: int64

In [63]:
from sklearn.decomposition import PCA

In [68]:
pca = PCA(n_components=73)

In [69]:
x_train=pca.fit_transform(x_train,y_train_breed)

In [70]:
x_test = pca.transform(x_test)

In [71]:
from xgboost import XGBClassifier

In [322]:
from sklearn.model_selection import RandomizedSearchCV

In [323]:
xgb = XGBClassifier(objective="multi:softmax")

In [324]:
parameters = {"learning_rate": [0.3, 0.01,0.1],
               "gamma" : [ 0.3, 0.5, 1, 1.5, 2,2.2,2.4],
               "max_depth": [6, 7, 8, 9],
               "colsample_bytree": [0.3, 0.6, 0.8, 1.0],
               "subsample": [0.2, 0.4, 0.5,0.6,0.8],
               "reg_alpha": [0, 0.5, 1],
               "reg_lambda": [0,1, 1.5, 2],
               "min_child_weight": [1, 3, 5, 7, 9]
              
               }

In [327]:
xgb_rscv = RandomizedSearchCV(xgb, param_distributions = parameters, scoring = "f1_micro",
                             cv = 5, verbose = 2, random_state = 1 )

In [328]:
xgb_rscv.fit(x_train,y_train_breed)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0, total=  12.2s
[CV] subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.1s remaining:    0.0s


[CV]  subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0, total=  12.4s
[CV] subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0 
[CV]  subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0, total=  14.6s
[CV] subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0 
[CV]  subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0, total=  16.1s
[CV] subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0 
[CV]  subsample=0.8, reg_lambda=2, reg_alpha=0, min_child_weight=5, max_depth=6, learning_rate=0.01, gamma=2.4, colsample_bytree=1.0, total=  16.0s
[CV] subs

[CV]  subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3, total=   5.4s
[CV] subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3 
[CV]  subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3, total=   5.4s
[CV] subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3 
[CV]  subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3, total=   5.6s
[CV] subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3 
[CV]  subsample=0.2, reg_lambda=0, reg_alpha=0.5, min_child_weight=5, max_depth=9, learning_rate=0.1, gamma=2.2, colsample_bytree=0.3, total=   5.5s
[C

[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed: 10.1min finished


RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=XGBClassifier(base_score=None, booster=None,
                                           colsample_bylevel=None,
                                           colsample_bynode=None,
                                           colsample_bytree=None, gamma=None,
                                           gpu_id=None, importance_type='gain',
                                           interaction_constraints=None,
                                           learning_rate=None,
                                           max_delta_step=None, max_depth=None,
                                           min_child_weight=None, missing=nan,
                                           monotone_constraints=None,
                                           n...
                   param_distributions={'colsample_bytree': [0.3, 0.6, 0.8,
                                                             1.0],
                                     

In [329]:
print(xgb_rscv.best_score_)
print(xgb_rscv.best_params_)

0.8873634476201564
{'subsample': 0.5, 'reg_lambda': 2, 'reg_alpha': 0, 'min_child_weight': 9, 'max_depth': 8, 'learning_rate': 0.1, 'gamma': 2.4, 'colsample_bytree': 0.6}


In [111]:


xgb = XGBClassifier(objective="multi:softmax",max_depth= 8, gamma =2.2 ,learning_rate=0.01 ,colsample_bytree=1.0 ,subsample=0.8 , reg_alpha=0.5 ,reg_lambda= 2,min_child_weight= 3,random_state=101)



In [112]:
xgb.fit(x_train,y_train_breed)

XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1.0, gamma=2.2, gpu_id=-1,
              importance_type='gain', interaction_constraints=None,
              learning_rate=0.01, max_delta_step=0, max_depth=8,
              min_child_weight=3, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='multi:softprob', random_state=101, reg_alpha=0.5,
              reg_lambda=2, scale_pos_weight=None, subsample=0.8,
              tree_method=None, validate_parameters=False, verbosity=None)

In [113]:
pred_breed= xgb.predict(x_test)

In [114]:
xgb1 = XGBClassifier(objective="multi:softmax",max_depth=8,random_state=101)

In [115]:
xgb1.fit(x_train2,y_train_pet)

XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints=None,
              learning_rate=0.300000012, max_delta_step=0, max_depth=8,
              min_child_weight=1, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=0, num_parallel_tree=1,
              objective='multi:softprob', random_state=101, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method=None, validate_parameters=False, verbosity=None)

In [116]:
pred_pet = xgb1.predict(x_test2)

In [117]:
pd.DataFrame({'pet_id':test["pet_id"],'breed_category':pred_breed,'pet_category':pred_pet}).to_csv('result.csv',index=False)