In [13]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor 

class XGBClassifier:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3, min_samples_split=2):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.trees = []
        self.weights = []

    def fit(self, X, y):
        self.classes_ = np.unique(y)
        for i in range(len(self.classes_)):
            y_binary = np.zeros(len(y))
            y_binary[y == self.classes_[i]] = 1
            self._fit_binary(X, y_binary)

    def predict_proba(self, X):
        proba = np.zeros((X.shape[0], len(self.classes_)))
        for i in range(len(self.classes_)):
            proba[:, i] = self._predict_binary(X, i)
        return proba / np.sum(proba, axis=1, keepdims=True)

    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)

    def _fit_binary(self, X, y):
        residuals = y
        for i in range(self.n_estimators):
            tree = DecisionTreeRegressor(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
            tree.fit(X, residuals, sample_weight=self._get_weights(residuals))
            self.trees.append(tree)
            self.weights.append(1.0)
            residuals = self._update_residuals(residuals, self._predict_binary(X, -1))

    def _predict_binary(self, X, class_idx):
        if class_idx == -1:
            return np.sum([self.weights[i] * self.trees[i].predict(X) for i in range(len(self.trees))], axis=0)
        else:
            return 1.0 / (1.0 + np.exp(-self._predict_binary(X, -1)))

    def _get_weights(self, residuals):
        return np.ones(len(residuals))

    def _update_residuals(self, residuals, predictions):
        return residuals - self.learning_rate * predictions






In [14]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [15]:
dataset = pd.read_csv('Dia.csv')

In [16]:
dataset.fillna(0, inplace=True)
dataset.fillna(dataset.mean(), inplace=True)

In [17]:
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:, -1].values

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [19]:
model = XGBClassifier()

In [20]:
model.fit(X_train, y_train)

In [21]:
y_pred = model.predict(X_test)

In [22]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 66.73%
