## Apress - Industrialized Machine Learning Examples

Andreas Francois Vermeulen
2019

### This is an example add-on to a book and needs to be accepted as part of that copyright.

# Chapter 005 Example 003

## First XGBoost model for Sickness Type 7 dataset

## Part A - Load Libraries

In [1]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import pandas as pd
import os
from numpy import genfromtxt

## Part B - Load Data

In [2]:
fileName = '../../Data/Sickness07.csv'
fileFullName = os.path.abspath(fileName)
print(fileFullName)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Data\Sickness07.csv


In [3]:
dataset = genfromtxt(fileFullName, delimiter=",", comments='#', autostrip=True)
print('Loaded %0d rows by %0d columns' % (dataset.shape[0],dataset.shape[1]))

Loaded 768 rows by 9 columns


In [4]:
# split data into X and y
X = dataset[:,0:8]
Y = dataset[:,8]
# split data into train and test sets
seed = 1968
test_size = 0.05
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

## Part C - Execute XGBClassifier

In [5]:
# fit model no training data
xc = XGBClassifier(max_depth=12, 
                   learning_rate=0.05, 
                   n_estimators=1968, 
                   nthread=8)

xc.fit(X_train, y_train)

# make predictions for test data
y_pred = xc.predict(X_test)

predictions = [int(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %7.5f%%" % (accuracy * 100.0))

Accuracy: 82.05128%


In [6]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

## Part D - Output Results

In [7]:
imagepath = os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 05'])
print(imagepath)
if not os.path.exists(imagepath):
    os.makedirs(imagepath)
filetxt= os.path.join(*[os.path.dirname(os.path.dirname(os.getcwd())),'Results','Chapter 05','Chapter-005-Example-003-001.txt'])
print(filetxt)

C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Results\Chapter 05
C:\Users\AndreVermeulen\Documents\My Book\apress\Industrial Machine Learning\book\GitHub\Upload\industrial-machine-learning\Results\Chapter 05\Chapter-005-Example-003-001.txt


In [8]:
f = open(filetxt,'w')
line='|' .join(['Valid','ID', 'Features', 'TestValue', 'PredictValue','True_False\n'])
f.write(line)
print(line.replace('|','\t'))

n=y_test.shape[0]
for i in range(0, n):
    if y_test[i]==predictions[i]:
        line='|' .join(['+',str(i), str([round(value,6) for value in X_test[i]]), str(int(y_test[i])), str(int(predictions[i])),'True\n'])
        f.write(line)
    else:
        line='|' .join(['-',str(i), str([round(value,6) for value in X_test[i]]), str(int(y_test[i])), str(int(predictions[i])),'False\n'])
        f.write(line)
    print(line.replace('|','\t'))
f.close() 

Valid	ID	Features	TestValue	PredictValue	True_False

+	0	[1.0, 89.0, 76.0, 34.0, 37.0, 31.2, 0.192, 23.0]	0	0	True

+	1	[4.0, 173.0, 70.0, 14.0, 168.0, 29.7, 0.361, 33.0]	1	1	True

+	2	[5.0, 95.0, 72.0, 33.0, 0.0, 37.7, 0.37, 27.0]	0	0	True

-	3	[3.0, 128.0, 72.0, 25.0, 190.0, 32.4, 0.549, 27.0]	1	0	False

-	4	[0.0, 135.0, 68.0, 42.0, 250.0, 42.3, 0.365, 24.0]	1	0	False

+	5	[1.0, 136.0, 74.0, 50.0, 204.0, 37.4, 0.399, 24.0]	0	0	True

+	6	[9.0, 130.0, 70.0, 0.0, 0.0, 34.2, 0.652, 45.0]	1	1	True

+	7	[0.0, 119.0, 66.0, 27.0, 0.0, 38.8, 0.259, 22.0]	0	0	True

+	8	[10.0, 148.0, 84.0, 48.0, 237.0, 37.6, 1.001, 51.0]	1	1	True

+	9	[5.0, 121.0, 72.0, 23.0, 112.0, 26.2, 0.245, 30.0]	0	0	True

+	10	[2.0, 99.0, 52.0, 15.0, 94.0, 24.6, 0.637, 21.0]	0	0	True

+	11	[1.0, 118.0, 58.0, 36.0, 94.0, 33.3, 0.261, 23.0]	0	0	True

+	12	[1.0, 121.0, 78.0, 39.0, 74.0, 39.0, 0.261, 28.0]	0	0	True

+	13	[0.0, 86.0, 68.0, 32.0, 0.0, 35.8, 0.238, 25.0]	0	0	True

+	14	[2.0, 127.0, 58.0, 24.0, 275.0, 27.7, 1.6, 

## Part E - Open Results

In [11]:
import webbrowser
r=webbrowser.open(filetxt)

## Done

In [10]:
import datetime
now = datetime.datetime.now()
print('Done!',str(now))

Done! 2019-10-19 17:43:07.750453
