# DBSCAN python sklearn performance

In [None]:
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import numpy as np
import time

In [None]:
eps = .05
minpts = 4

n, d = 150000, 2

X, y = make_blobs(n_samples=n, n_features=d, shuffle=True, random_state=None, centers=20)
print(X.shape)

np.savetxt('data.txt', X.flatten(order='F'), fmt='%f', header=f'{X.shape[0]} {X.shape[1]} {eps} {minpts}')

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.show()

In [None]:
from sklearn.cluster import DBSCAN
import multiprocessing

t0 = time.perf_counter()
clustering = DBSCAN(eps=eps, min_samples=minpts, n_jobs=-1).fit(X)
python_time = time.perf_counter() - t0
print(f'Elapsed time {python_time}s with {multiprocessing.cpu_count()} cpu')

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=clustering.labels_)
plt.show()

In [None]:
from sklearn import metrics

metrics.adjusted_rand_score(y, clustering.labels_), metrics.adjusted_rand_score(y, y)

# Test and profiling CUDA Implementation

In [None]:
!nvidia-smi

In [None]:
%cd /content/drive/MyDrive/G-DBSCAN
!nvcc --generate-line-info -arch=sm_75 gdbscan.cu && ./a.out ../../../data.txt
!nvcc -arch=sm_75 gdbscan.cu -o perf.out
#k80
#!nvcc --generate-line-info -arch=sm_37 gdbscan.cu && ./a.out ../../../data.txt

In [None]:
# Only for debug purposes
#!cuda-memcheck ./a.out | head -n 100

In [None]:
# To visualize with Nvidia Nsight graphic interface
#!rm *.qdrep
#!nsys profile ./a.out ../../../data.txt

In [None]:
!ncu --kernel-name compute_degrees --launch-skip 0 --launch-count 1 "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
!ncu --kernel-name compute_adjacency_list --launch-skip 0 --launch-count 1 "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
cuda_labels = np.loadtxt("out.txt", dtype=np.int32)
metrics.adjusted_rand_score(y, cuda_labels ), metrics.adjusted_rand_score(clustering.labels_, cuda_labels)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=cuda_labels)
plt.show()

In [None]:
!time ./perf.out ../../../data.txt

In [None]:
!ncu --kernel-name compute_degrees --launch-skip 0 --launch-count 1 --section ComputeWorkloadAnalysis "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
!ncu --kernel-name compute_degrees --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
!ncu --kernel-name compute_adjacency_list --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
!date