<a href="https://colab.research.google.com/github/ImanolSupo/DP-MDP-RL-Algorithms/blob/main/scikit_learn_acceleration_with_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Accelerating scikit-learn with GPU: Random Forest classification

Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU.

In [None]:
%load_ext cuml.accel
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

X, y = make_classification(n_samples=100000, n_features=100, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = RandomForestClassifier(n_estimators=100, max_depth=5, max_features=1.0, n_jobs=-1)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

# Accelerating scikit-learn with GPU: K-Nearest Neighbors classifier

Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU


In [None]:
%load_ext cuml.accel
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier

X, y = make_classification(n_samples=100000, n_features=100, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print(classification_report(y_test, y_pred))

# Accelerating scikit-learn with GPU: Logistic Regression classifier


Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU


In [None]:
%load_ext cuml.accel
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X, y = make_classification(n_samples=1000000, n_features=200, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = LogisticRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

# Accelerating scikit-learn with GPU: K-Means Segmentation

Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU


In [None]:
%load_ext cuml.accel
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score

n_samples = 1000
n_features = 2
n_clusters = 3
X, _ = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=42)

kmeans = KMeans(n_clusters=n_clusters, max_iter=100)
kmeans.fit(X)

labels = kmeans.labels_
print(silhouette_score(X, labels))

# Accelerating scikit-learn with GPU: HDBSCAN Segmentation

Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU


In [None]:
%load_ext cuml.accel
import hdbscan
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score

n_samples = 1000
n_features = 2
n_clusters = 3
X, _ = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=42)

clus = hdbscan.HDBSCAN()
clus.fit(X)

print(silhouette_score(X, clus.labels_))

# Accelerating scikit-learn with GPU: PCA Dimensionality Reduction

Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU


In [None]:
%load_ext cuml.accel
import numpy as np
from sklearn.decomposition import PCA

X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)

print(pca.explained_variance_ratio_)
print(pca.singular_values_)

# Accelerating scikit-learn with GPU: UMAP Dimensionality Reduction

Add `%load_ext cuml.accel` before importing sklearn to speed up operations using GPU


In [None]:
%load_ext cuml.accel
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import umap

X, y = make_classification(n_samples=100000, n_features=20, n_classes=5, n_informative=5, random_state=0)
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2)

umap_model = umap.UMAP(n_neighbors=15, n_components=2, random_state=42, min_dist=0.0)
X_train_umap = umap_model.fit_transform(X_train)
y_train
# Plot the UMAP result
plt.figure(figsize=(10, 8))
plt.scatter(X_train_umap[:, 0], X_train_umap[:, 1], c=y_train, cmap='Spectral', s=10)
plt.colorbar(label="Activity")
plt.title("UMAP projection")
plt.xlabel("UMAP Component 1")
plt.ylabel("UMAP Component 2")
plt.show()