In [1]:
import numpy as np
import pandas as pd
from gboosting import XGBTreeClassifier, XGBTreeRegressor
import xgboost as xgb

### Binary classification

In [2]:
from sklearn.metrics import accuracy_score

In [3]:
def parse_monks(filename):
    data = pd.read_csv(filename, sep=' ', header=None, skipinitialspace=True)
    data = data.iloc[:,:-1] 
    y = data[0].values
    X = pd.get_dummies(data.drop(0,axis=1),
                       columns=[1,2,3,4,5,6]).values.astype(int)
    return X, y

#### Monk 1

In [4]:
X_tr, y_tr = parse_monks('datasets/Monks/monks-1.train')
X_ts, y_ts = parse_monks('datasets/Monks/monks-1.test')

In [7]:
my_gbm = XGBTreeClassifier(n_estimators=100,
                           max_depth=4,
                           eta=0.5,
                           lmbda=0.9,
                           gamma=0.0)
my_gbm.fit(X_tr, y_tr, verbose=True)

print('TRAIN\t|', accuracy_score(y_tr, my_gbm.predict(X_tr)))
print('TEST\t|', accuracy_score(y_ts, my_gbm.predict(X_ts)))

  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:00<00:00, 132.21it/s]

TRAIN	| 1.0
TEST	| 1.0





In [6]:
official_xgb = xgb.XGBClassifier(n_estimators=100,
                                 max_depth=4,
                                 learning_rate=0.5,
                                 reg_lambda=0.9,
                                 gamma=0.0)
official_xgb.fit(X_tr, y_tr)

print('TRAIN\t|', accuracy_score(y_tr, official_xgb.predict(X_tr)))
print('TEST\t|', accuracy_score(y_ts, official_xgb.predict(X_ts)))

TRAIN	| 1.0
TEST	| 1.0


#### Monk 2

In [8]:
X_tr, y_tr = parse_monks('datasets/Monks/monks-2.train')
X_ts, y_ts = parse_monks('datasets/Monks/monks-2.test')

In [19]:
my_gbm = XGBTreeClassifier(n_estimators=1000,
                           max_depth=8,
                           eta=0.1,
                           lmbda=0.5,
                           gamma=0.0)
my_gbm.fit(X_tr, y_tr, verbose=True)

print('TRAIN\t|', accuracy_score(y_tr, my_gbm.predict(X_tr)))
print('TEST\t|', accuracy_score(y_ts, my_gbm.predict(X_ts)))

  0%|          | 0/1000 [00:00<?, ?it/s]

 55%|█████▍    | 545/1000 [00:29<00:24, 18.33it/s]

TRAIN	| 1.0
TEST	| 0.8101851851851852





In [10]:
official_xgb = xgb.XGBClassifier(n_estimators=20,
                                 max_depth=8,
                                 learning_rate=0.5,
                                 reg_lambda=0.5,
                                 gamma=0.0)
official_xgb.fit(X_tr, y_tr)

print('TRAIN\t|', accuracy_score(y_tr, official_xgb.predict(X_tr)))
print('TEST\t|', accuracy_score(y_ts, official_xgb.predict(X_ts)))

TRAIN	| 1.0
TEST	| 0.7962962962962963
