In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import seaborn as sb
sb.set(style="whitegrid")

In [2]:
train = pd.read_csv('../Train/train_set_binary_encoding_sin_secondary_use.csv')

train.head()

Unnamed: 0,geo_level_1_id,geo_level_2_id,geo_level_3_id,count_floors_pre_eq,age,area_percentage,height_percentage,has_superstructure_adobe_mud,has_superstructure_mud_mortar_stone,has_superstructure_stone_flag,...,position_b1,position_b0,plan_configuration_b3,plan_configuration_b2,plan_configuration_b1,plan_configuration_b0,legal_ownership_status_b2,legal_ownership_status_b1,legal_ownership_status_b0,damage_grade
0,6,487,12198,2,30,6,5,1,1,0,...,1,0,0,0,0,1,0,0,1,3
1,8,900,2812,2,10,8,7,0,1,0,...,0,1,0,0,0,1,0,0,1,2
2,21,363,8973,2,10,5,5,0,1,0,...,1,0,0,0,0,1,0,0,1,3
3,22,418,10694,2,10,6,5,0,1,0,...,0,1,0,0,0,1,0,0,1,2
4,11,131,1488,3,30,8,9,1,0,0,...,0,1,0,0,0,1,0,0,1,3


In [3]:
X, y = train.iloc[:,:-1], train.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)

In [4]:
params = {
    'n_estimators': [10, 30],
    'max_depth': [20, 40],
    'verbosity': [0],
    'gamma': [0.1],
    'subsample': [0.5],
    'random_state': [123]
}

In [5]:
opt = GridSearchCV(xgb.XGBClassifier(), params)
opt.fit(X_train, y_train)





GridSearchCV(estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, random_state=None,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, validate_parameter

In [6]:
opt.best_estimator_

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0.1, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=20,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=10, n_jobs=4, num_parallel_tree=1,
              objective='multi:softprob', random_state=123, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=0.5,
              tree_method='exact', validate_parameters=1, verbosity=0)

In [7]:
preds = opt.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score %f" % (f1))

F1-Mean Score 0.733395


## Pruebas manuales

n estimator 

In [8]:
xgb_model = xgb.XGBClassifier(n_estimator= 10, gamma=0.1, subsample=0.6, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.728752


In [9]:
xgb_model = xgb.XGBClassifier(n_estimator= 9, gamma=0.1, subsample=0.6, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.728752


In [10]:
xgb_model = xgb.XGBClassifier(n_estimator= 11, gamma=0.1, subsample=0.6, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.728752


subsample

In [11]:
xgb_model = xgb.XGBClassifier(n_estimator= 10, gamma=0.1, subsample=0.4, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.727869


In [13]:
xgb_model = xgb.XGBClassifier(n_estimator= 10, gamma=0.1, subsample=0.55, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.725682


In [14]:
xgb_model = xgb.XGBClassifier(n_estimator= 10, gamma=0.1, subsample=0.45, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.725567


In [12]:
xgb_model = xgb.XGBClassifier(n_estimator= 10, gamma=0.1, subsample=0.3, random_state=123, verbosity=0, learning_rate=0.3)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.721231


learning rate

In [15]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.1, subsample=0.5, random_state=123, verbosity=0, learning_rate=0.4)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.729442


In [16]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.1, subsample=0.5, random_state=123, verbosity=0, learning_rate=0.2)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.717010


In [17]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.1, subsample=0.5, random_state=123, verbosity=0, learning_rate=0.31)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.726603


In [18]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.1, subsample=0.5, random_state=123, verbosity=0, learning_rate=0.29)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.724262


gamma

In [20]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.2, subsample=0.5, random_state=123, verbosity=0)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))

F1-Mean Score: 0.725605


In [21]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.3, subsample=0.5, random_state=123, verbosity=0)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.726756


In [22]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.01, subsample=0.5, random_state=123, verbosity=0)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.724838


In [23]:
xgb_model = xgb.XGBClassifier(n_estimator=10, gamma=0.5, subsample=0.5, random_state=123, verbosity=0)
xgb_model.fit(X_train,y_train)
preds = xgb_model.predict(X_test)
f1 = f1_score(y_test, preds, average='micro')
print("F1-Mean Score: %f" % (f1))



F1-Mean Score: 0.725989
