## cuML

In [1]:
import time
from cuml.datasets import make_classification
from cuml.svm import LinearSVC, SVC
from cuml.model_selection import train_test_split
from cuml.metrics import accuracy_score

In [2]:
# Generate synthetic classification data on the GPU
# Note: n_informative is set lower than n_features to simulate a realistic sparse scenario.
X, y = make_classification(n_samples=5000,
                           n_features=65000,
                           n_informative=500,
                           n_classes=2,
                           random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# ---------------------------
# Train a linear classifier using cuML's LinearSVC
# ---------------------------
linear_svc = LinearSVC(max_iter=1000, tol=1e-4)
start = time.time()
linear_svc.fit(X_train, y_train)
end = time.time()
print("cuML LinearSVC training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_linear = linear_svc.predict(X_test)
acc_linear = accuracy_score(y_test, pred_linear)
print("cuML LinearSVC test accuracy: {:.4f}".format(acc_linear))

cuML LinearSVC training time: 0.3481 seconds
cuML LinearSVC test accuracy: 0.8260


In [4]:
# ---------------------------
# Train an SVM with a linear kernel using cuML's SVC
# ---------------------------
svc = SVC(kernel="linear", max_iter=1000)
start = time.time()
svc.fit(X_train, y_train)
end = time.time()
print("cuML SVC (linear kernel) training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_svc = svc.predict(X_test)
acc_svc = accuracy_score(y_test, pred_svc)
print("cuML SVC (linear kernel) test accuracy: {:.4f}".format(acc_svc))

cuML SVC (linear kernel) training time: 0.7493 seconds
cuML SVC (linear kernel) test accuracy: 0.8340


In [5]:
# ---------------------------
# Train an SVM with a poly kernel using cuML's SVC
# ---------------------------
svc = SVC(kernel="poly", max_iter=1000)
start = time.time()
svc.fit(X_train, y_train)
end = time.time()
print("cuML SVC (linear kernel) training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_svc = svc.predict(X_test)
acc_svc = accuracy_score(y_test, pred_svc)
print("cuML SVC (linear kernel) test accuracy: {:.4f}".format(acc_svc))

cuML SVC (linear kernel) training time: 0.5779 seconds
cuML SVC (linear kernel) test accuracy: 0.8020


In [6]:
# ---------------------------
# Train an SVM with a rbf kernel using cuML's SVC
# ---------------------------
svc = SVC(kernel="rbf", max_iter=1000)
start = time.time()
svc.fit(X_train, y_train)
end = time.time()
print("cuML SVC (linear kernel) training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_svc = svc.predict(X_test)
acc_svc = accuracy_score(y_test, pred_svc)
print("cuML SVC (linear kernel) test accuracy: {:.4f}".format(acc_svc))

cuML SVC (linear kernel) training time: 0.6028 seconds
cuML SVC (linear kernel) test accuracy: 0.8840


In [7]:
# ---------------------------
# Train an SVM with a sigmoid kernel using cuML's SVC
# ---------------------------
svc = SVC(kernel="sigmoid", max_iter=1000)
start = time.time()
svc.fit(X_train, y_train)
end = time.time()
print("cuML SVC (linear kernel) training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_svc = svc.predict(X_test)
acc_svc = accuracy_score(y_test, pred_svc)
print("cuML SVC (linear kernel) test accuracy: {:.4f}".format(acc_svc))

cuML SVC (linear kernel) training time: 0.5351 seconds
cuML SVC (linear kernel) test accuracy: 0.6910


## Sklearn

In [4]:
import time
from sklearn.datasets import make_classification
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
# Generate synthetic classification data on the CPU
# Here, n_informative is set lower than n_features to simulate a scenario with sparse signal.
X, y = make_classification(n_samples=5000,
                           n_features=65000,
                           n_informative=500,
                           n_classes=2,
                           random_state=42)

# Split the data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# ---------------------------
# Train a linear classifier using sklearn's LinearSVC
# ---------------------------
linear_svc = LinearSVC(max_iter=1000, tol=1e-4)
start = time.time()
linear_svc.fit(X_train, y_train)
end = time.time()
print("sklearn LinearSVC training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_linear = linear_svc.predict(X_test)
acc_linear = accuracy_score(y_test, pred_linear)
print("sklearn LinearSVC test accuracy: {:.4f}".format(acc_linear))

TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.

In [None]:
# ---------------------------
# Train an SVM with a linear kernel using sklearn's SVC
# ---------------------------
# Note: SVC uses a dual formulation, which can be less efficient on large datasets.
svc = SVC(kernel="linear")
start = time.time()
svc.fit(X_train, y_train)
end = time.time()
print("sklearn SVC (linear kernel) training time: {:.4f} seconds".format(end - start))

# Predict on the test set and compute accuracy
pred_svc = svc.predict(X_test)
acc_svc = accuracy_score(y_test, pred_svc)
print("sklearn SVC (linear kernel) test accuracy: {:.4f}".format(acc_svc))