In [1]:
import cupy as cp
import cuml
import sys
sys.path.append('/home/jhovan/Documents/CustomImplementations/gentleboost')
from gentleboost import GentleBoost


In [2]:
from cuml.datasets import make_classification

# Create dataset
X, y = make_classification(
    n_samples=10000,
    n_features=20,
    n_classes=2,
    n_clusters_per_class=3,
    n_informative=15,
    random_state=42,
    class_sep=1.0  # Increase class separation
)
X = cp.array(X)
y = cp.array(y)
y_shifted = 2 * y - 1

In [4]:
# Try different parameters
learning_rates = [1.0]
n_estimators_list = [100, 200, 300, 400, 500]

best_accuracy = 0
best_params = {}

for lr in learning_rates:
    for n_est in n_estimators_list:
        gb_classifier = GentleBoost(n_estimators=n_est, learning_rate=lr, patience=20)
        gb_classifier.fit(X, y_shifted)
        y_pred = gb_classifier.predict(X)
        accuracy = cp.mean(y_shifted == y_pred)
        
        print(f"\nParameters: learning_rate={lr}, n_estimators={n_est}")
        print(f"Training Accuracy: {accuracy:.4f}")
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_params = {'learning_rate': lr, 'n_estimators': n_est}

print(f"\nBest parameters: {best_params}")
print(f"Best accuracy: {best_accuracy:.4f}")

Iteration 0, Train Error: 0.3772, Val Error: 0.3705
Iteration 10, Train Error: 0.2552, Val Error: 0.2405
Iteration 20, Train Error: 0.2124, Val Error: 0.2170
Iteration 30, Train Error: 0.1900, Val Error: 0.1915
Iteration 40, Train Error: 0.1815, Val Error: 0.1865
Iteration 50, Train Error: 0.1705, Val Error: 0.1740
Iteration 60, Train Error: 0.1666, Val Error: 0.1715
Iteration 70, Train Error: 0.1659, Val Error: 0.1675
Iteration 80, Train Error: 0.1653, Val Error: 0.1670
Iteration 90, Train Error: 0.1641, Val Error: 0.1670
Early stopping at iteration 94. Best val error: 0.1665

Parameters: learning_rate=1.0, n_estimators=100
Training Accuracy: 0.8348
Iteration 0, Train Error: 0.3706, Val Error: 0.3985
Iteration 10, Train Error: 0.2559, Val Error: 0.2635
Iteration 20, Train Error: 0.2137, Val Error: 0.2305
Iteration 30, Train Error: 0.1903, Val Error: 0.2105
Iteration 40, Train Error: 0.1762, Val Error: 0.1950
Iteration 50, Train Error: 0.1700, Val Error: 0.1830
Iteration 60, Train Erro

In [9]:
from cuml.linear_model import LogisticRegression

lr_classifier = LogisticRegression()
lr_classifier.fit(X, y)
y_pred = lr_classifier.predict(X)
accuracy = cp.mean(y == y_pred)
print(f"Logistic Regression Accuracy: {accuracy:.4f}")

[W] [17:04:52.468233] L-BFGS stopped, because the line search failed to advance (step delta = 0.000000)
Logistic Regression Accuracy: 0.9605


In [10]:
# comparing to KNN classifier
from cuml.neighbors import KNeighborsClassifier

# try KNN with differnt number of neighbors
k_values = [3, 5, 7, 9]
for k in k_values:
    knn_classifier = KNeighborsClassifier(n_neighbors=k)
    knn_classifier.fit(X, y)
    y_pred = knn_classifier.predict(X)
    accuracy = cp.mean(y == y_pred)
    print(f"KNN with {k} neighbors Accuracy: {accuracy:.4f}")

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = {'n_neighbors': k}

print(f"\nBest parameters: {best_params}")
print(f"Best accuracy: {best_accuracy:.4f}")

KNN with 3 neighbors Accuracy: 0.9842
KNN with 5 neighbors Accuracy: 0.9821
KNN with 7 neighbors Accuracy: 0.9797
KNN with 9 neighbors Accuracy: 0.9794

Best parameters: {'n_neighbors': 3}
Best accuracy: 0.9842
