# Using XGB - Base and Hyperparameter Tuned

In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as tts
import matplotlib.pyplot as plt

## Load Data

In [4]:
x = pd.read_csv('../data/processed/x.csv')
y = pd.read_csv('../data/processed/y.csv')
print(x.shape, y.shape)

(3000, 9) (3000, 1)


In [6]:
x.head()

Unnamed: 0,Duration(sec),Paket Loss Rate(Reliability),Packet Delay Budget(Latency(ms)),Bandwidth(GHz),Delay Rate(Mbps),Speed(Mbps),Jitter(ps),User Device Type,Modulation Type
0,1.0,1.0,1.0,0.032258,1.0,0.34961,0.510791,8,0
1,0.074074,1.0,0.122449,0.290323,0.081395,0.880298,0.539568,8,0
2,0.333333,0.0009,0.918367,0.290323,0.034884,0.939301,0.827338,8,0
3,0.111111,0.009901,0.22449,0.677419,0.034884,0.275687,1.0,8,0
4,0.0,0.0,0.979592,1.0,0.081395,0.642252,0.366906,8,0


In [7]:
y.head()

Unnamed: 0,Slice Type
0,1
1,1
2,1
3,1
4,1


In [9]:
X_train, X_test, y_train, y_test = tts(x, y, test_size=0.30, random_state=10)
print('Shape of X_train=',X_train.shape)
print('Shape of X_test=',X_test.shape)
print('Shape of y_train=',y_train.shape)
print('Shape of y_test=',y_test.shape)

Shape of X_train= (2100, 9)
Shape of X_test= (900, 9)
Shape of y_train= (2100, 1)
Shape of y_test= (900, 1)


# XGBoost

In [29]:

import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [14]:
# read in data
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
# specify parameters via map
param = {'max_depth':20, 'eta':0.001, 'objective': 'multi:softmax', 'num_class':78, 'tree_method': 'gpu_hist' }
num_round = 100
watchlist = [(dtrain, 'train'), (dtest, 'test')]
bst = xgb.train(param, dtrain, num_round, watchlist)
# make prediction
preds_test = bst.predict(dtest)
preds_train = bst.predict(dtrain)
print(accuracy_score(y_train, preds_train))
print(accuracy_score(y_test, preds_test))

[0]	train-mlogloss:4.25482	test-mlogloss:4.25340
[1]	train-mlogloss:4.16353	test-mlogloss:4.16103
[2]	train-mlogloss:4.08073	test-mlogloss:4.07741
[3]	train-mlogloss:4.00492	test-mlogloss:4.00090
[4]	train-mlogloss:3.93495	test-mlogloss:3.93024
[5]	train-mlogloss:3.86976	test-mlogloss:3.86476
[6]	train-mlogloss:3.80899	test-mlogloss:3.80378
[7]	train-mlogloss:3.75188	test-mlogloss:3.74640
[8]	train-mlogloss:3.69807	test-mlogloss:3.69255
[9]	train-mlogloss:3.64711	test-mlogloss:3.64169
[10]	train-mlogloss:3.59890	test-mlogloss:3.59354
[11]	train-mlogloss:3.55294	test-mlogloss:3.54772
[12]	train-mlogloss:3.50911	test-mlogloss:3.50418
[13]	train-mlogloss:3.46721	test-mlogloss:3.46277
[14]	train-mlogloss:3.42712	test-mlogloss:3.42312
[15]	train-mlogloss:3.38835	test-mlogloss:3.38478
[16]	train-mlogloss:3.35118	test-mlogloss:3.34803
[17]	train-mlogloss:3.31526	test-mlogloss:3.31268
[18]	train-mlogloss:3.28071	test-mlogloss:3.27874
[19]	train-mlogloss:3.24734	test-mlogloss:3.24601
[20]	train

In [20]:
# read in data
dmatrix = xgb.DMatrix(x, label=y)
# specify parameters via map
param = {'max_depth':10, 'eta':0.001, 'objective': 'multi:softmax', 'num_class':78, 'tree_method': 'gpu_hist', }
num_round = 100
watchlist = [(dtrain, 'train'), (dtest, 'test')]
bst = xgb.cv(params=param, dtrain=dtrain, nfold=10, metrics={'merror'}, as_pandas=True)

In [21]:
bst

Unnamed: 0,train-merror-mean,train-merror-std,test-merror-mean,test-merror-std
0,0.285873,0.003236,0.286667,0.026238
1,0.285873,0.003236,0.286667,0.026238
2,0.285926,0.003159,0.286667,0.026238
3,0.286085,0.002965,0.28619,0.025859
4,0.286085,0.002965,0.28619,0.025859
5,0.286085,0.002965,0.28619,0.025859
6,0.286085,0.002965,0.28619,0.025859
7,0.286085,0.002965,0.28619,0.025859
8,0.286085,0.002965,0.28619,0.025859
9,0.286085,0.002965,0.28619,0.025859


In [22]:
from sklearn.model_selection import GridSearchCV

In [28]:
X_train, X_test, y_train, y_test = tts(x, y, test_size=0.30, random_state=10)
print('Shape of X_train=',X_train.shape)
print('Shape of X_test=',X_test.shape)
print('Shape of y_train=',y_train.shape)
print('Shape of y_test=',y_test.shape)

Shape of X_train= (2100, 9)
Shape of X_test= (900, 9)
Shape of y_train= (2100, 1)
Shape of y_test= (900, 1)


In [32]:
#XGBoost hyper-parameter tuning
def hyperParameterTuning(X_train, y_train):
    param_tuning = {
        'learning_rate': [0.01, 0.1],
        'max_depth': [3, 5, 7, 10],
        'min_child_weight': [1, 3, 5],
        'subsample': [0.5, 0.7],
        'colsample_bytree': [0.5, 0.7],
        'n_estimators' : [100, 200, 500],
        'objective': ['multi:softmax'],
        'tree_method': ['gpu_hist']
    }

    xgb_model = XGBClassifier()

    gsearch = GridSearchCV(estimator = xgb_model,
                           param_grid = param_tuning,                        
                           #scoring = 'neg_mean_absolute_error', #MAE
                           #scoring = 'neg_mean_squared_error',  #MSE
                           cv = 5,
                           n_jobs = -1,
                           verbose = 1)

    gsearch.fit(X_train,y_train)

    return gsearch.best_params_

In [33]:
hyperParameterTuning(X_train, y_train)

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
