In [1]:
import cupy as cp
import cuml
import sys
sys.path.append('/home/jhovan/Documents/CustomImplementations/gentleboost')
from gentleboost import GentleBoost


In [2]:
from cuml.datasets import make_classification

# Create dataset
X, y = make_classification(
    n_samples=10000,
    n_features=20,
    n_classes=2,
    n_clusters_per_class=3,
    n_informative=15,
    random_state=42,
    class_sep=1.0  # Increase class separation
)
X = cp.array(X)
y = cp.array(y)
y_shifted = 2 * y - 1

In [5]:
# Try different parameters
learning_rates = [1.0]
n_estimators_list = [100, 200, 300, 400, 500]

best_accuracy = 0
best_params = {}

for lr in learning_rates:
    for n_est in n_estimators_list:
        gb_classifier = GentleBoost(n_estimators=n_est, learning_rate=lr, patience=20)
        gb_classifier.fit(X, y_shifted)
        y_pred = gb_classifier.predict(X)
        accuracy = cp.mean(y_shifted == y_pred)
        
        print(f"\nParameters: learning_rate={lr}, n_estimators={n_est}")
        print(f"Training Accuracy: {accuracy:.4f}")
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'learning_rate': lr, 'n_estimators': n_est}

print(f"\nBest parameters: {best_params}")
print(f"Best accuracy: {best_accuracy:.4f}")

Iteration 0, Train Error: 0.3751, Val Error: 0.3810
Iteration 10, Train Error: 0.2492, Val Error: 0.2620
Iteration 20, Train Error: 0.2147, Val Error: 0.2265
Iteration 30, Train Error: 0.1904, Val Error: 0.2060
Iteration 40, Train Error: 0.1746, Val Error: 0.1940
Iteration 50, Train Error: 0.1659, Val Error: 0.1880
Iteration 60, Train Error: 0.1623, Val Error: 0.1820
Iteration 70, Train Error: 0.1599, Val Error: 0.1825
Iteration 80, Train Error: 0.1594, Val Error: 0.1815
Early stopping at iteration 87. Best val error: 0.1790

Parameters: learning_rate=1.0, n_estimators=100
Training Accuracy: 0.8354
Iteration 0, Train Error: 0.3757, Val Error: 0.3755
Iteration 10, Train Error: 0.2647, Val Error: 0.2675
Iteration 20, Train Error: 0.2151, Val Error: 0.2060
Iteration 30, Train Error: 0.1924, Val Error: 0.1910
Iteration 40, Train Error: 0.1760, Val Error: 0.1755
Iteration 50, Train Error: 0.1689, Val Error: 0.1720
Iteration 60, Train Error: 0.1666, Val Error: 0.1660
Iteration 70, Train Erro

In [9]:
from cuml.linear_model import LogisticRegression

lr_classifier = LogisticRegression()
lr_classifier.fit(X, y)
y_pred = lr_classifier.predict(X)
accuracy = cp.mean(y == y_pred)
print(f"Logistic Regression Accuracy: {accuracy:.4f}")

[W] [17:04:52.468233] L-BFGS stopped, because the line search failed to advance (step delta = 0.000000)
Logistic Regression Accuracy: 0.9605


In [10]:
# comparing to KNN classifier
from cuml.neighbors import KNeighborsClassifier

# try KNN with differnt number of neighbors
k_values = [3, 5, 7, 9]
for k in k_values:
    knn_classifier = KNeighborsClassifier(n_neighbors=k)
    knn_classifier.fit(X, y)
    y_pred = knn_classifier.predict(X)
    accuracy = cp.mean(y == y_pred)
    print(f"KNN with {k} neighbors Accuracy: {accuracy:.4f}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = {'n_neighbors': k}

print(f"\nBest parameters: {best_params}")
print(f"Best accuracy: {best_accuracy:.4f}")

KNN with 3 neighbors Accuracy: 0.9842
KNN with 5 neighbors Accuracy: 0.9821
KNN with 7 neighbors Accuracy: 0.9797
KNN with 9 neighbors Accuracy: 0.9794

Best parameters: {'n_neighbors': 3}
Best accuracy: 0.9842
