In [6]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb

In [2]:
# Load the data set and separate the features from the targets
train_test_df = pd.read_csv("train_test.csv")
# The `to_numpy()` method converts the Pandas Series/DataFrame to a
# NumPy array
# We're only interested in standardised columns, i.e. columns containing
# `Zscore` in their name
# Right now, both the infection cound and index are employed; in the
# future, one of them might be discarded
X = train_test_df.iloc[:, 18:].to_numpy()
# Indexing begins with 0, which is why all class labels need to be
# reduced by 1
y = train_test_df["Class"].to_numpy() - 1

# Now, perform the train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, train_size=0.8, random_state=1
)

In [3]:
# Create classification matrices
dtrain_clf = xgb.DMatrix(X_train, y_train)
dtest_clf = xgb.DMatrix(X_test, y_test)

In [4]:
# Define hyperparameters and the amount of boosting rounds
params = {
    "objective": "multi:softprob",
    # Unfortunately, XGBoost does not support Apple GPU, which is why
    # `hist` is passed as value for `tree_method` instead of `gpu_hist`
    "tree_method": "hist",
    "num_class": 6
}

n = 100

In [5]:
# Finally, perform training
model = xgb.train(
    params=params,
    dtrain=dtrain_clf,
    num_boost_round=n
)

In [15]:
# Evaluate the model's performance by running it on previously unseen
# data
prob_preds = model.predict(dtest_clf)
# Note that the `predict` method does not immediately return class
# labels, but probabilities for each possible class
# Hence, in order to convert the probabilities into class labels,
# np.argmax is employed
label_preds = np.argmax(prob_preds, axis=1) 

# Quantify the distance between predictions and the ground truth via
# accuracy
acc = accuracy_score(y_test, label_preds)

In [17]:
print(f"Accuracy of the model: {acc:.4f}")

Accuracy of the model: 0.9994
