In [None]:
!nvidia-smi --query-gpu=name --format=csv,noheader
!date

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# DBSCAN python sklearn performance

In [None]:
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn import metrics
from sklearn.cluster import DBSCAN
import multiprocessing

In [None]:
eps = .05
minpts = 5

n, d = 200000, 10

X, y = make_blobs(n_samples=n, n_features=d, shuffle=True, random_state=None, centers=20)
print(X.shape)

np.savetxt('/content/drive/MyDrive/GPUcomputing/data.txt', X.flatten(order='F'), fmt='%f', header=f'{X.shape[0]} {X.shape[1]} {eps} {minpts}')

In [None]:
#plt.scatter(X[:, 0], X[:, 1], c=y)
#plt.show()

In [None]:
t0 = time.perf_counter()
clustering = DBSCAN(eps=eps, min_samples=minpts, n_jobs=-1, algorithm='brute').fit(X)
python_time = time.perf_counter() - t0
print(f'Elapsed time {python_time}s with {multiprocessing.cpu_count()} cpu')

In [None]:
#plt.scatter(X[:, 0], X[:, 1], c=clustering.labels_)
#plt.show()

In [None]:
metrics.adjusted_rand_score(y, clustering.labels_), metrics.adjusted_rand_score(y, y)

# Test and profiling CUDA Implementation

Compiles and profiles standard implementation

In [None]:
%cd /content/drive/MyDrive/G-DBSCAN
!nvcc --generate-line-info -arch=sm_75 gdbscan.cu
!nvcc -arch=sm_75 gdbscan.cu -o perf.out

!wc -l data.txt
!time ./perf.out data.txt > standard_perf_time
!cat standard_perf_time
!ncu --kernel-name compute_degrees --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > standard_computedegrees.csv
!ncu --kernel-name compute_adjacency_list --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > standard_computeadjlist.csv
!ncu --kernel-name cluster_assignment --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > standard_clusterassignment.csv
!ncu --kernel-name kernel_bfs --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > standard_kernelbfs.csv

In [None]:
cuda_labels = np.loadtxt("out.txt", dtype=np.int32)
metrics.adjusted_rand_score(y, cuda_labels ), metrics.adjusted_rand_score(clustering.labels_, cuda_labels)
#plt.scatter(X[:, 0], X[:, 1], c=cuda_labels)
#plt.show()

Compiles and profiles shifted memory access implementation

In [None]:
%cd /content/drive/MyDrive/G-DBSCAN
!nvcc --generate-line-info -arch=sm_75 gdbscan_shifted.cu
!nvcc -arch=sm_75 gdbscan_shifted.cu -o perf.out

!time ./perf.out ../../../data.txt > shifted_perf_time
!cat shifted_perf_time
!ncu --kernel-name compute_degrees --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shifted_computedegrees.csv
!ncu --kernel-name compute_adjacency_list --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shifted_computeadjlist.csv
!ncu --kernel-name cluster_assignment --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shifted_clusterassignment.csv
!ncu --kernel-name kernel_bfs --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shifted_kernelbfs.csv

In [None]:
cuda_labels = np.loadtxt("out_shifted.txt", dtype=np.int32)
metrics.adjusted_rand_score(y, cuda_labels ), metrics.adjusted_rand_score(clustering.labels_, cuda_labels)
#plt.scatter(X[:, 0], X[:, 1], c=cuda_labels)
#plt.show()

Compiles and profiles standard implementation

In [None]:
%cd /content/drive/MyDrive/G-DBSCAN
!nvcc --generate-line-info -arch=sm_75 gdbscan_shared.cu
!nvcc -arch=sm_75 gdbscan_shared.cu -o perf.out

!time ./perf.out ../../../data.txt > shared_perf_time
!cat shared_perf_time
!ncu --kernel-name compute_degrees --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shared_computedegrees.csv
!ncu --kernel-name compute_adjacency_list --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shared_computeadjlist.csv
!ncu --kernel-name cluster_assignment --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shared_clusterassignment.csv
!ncu --kernel-name kernel_bfs --csv --launch-skip 0 --launch-count 1 --set full "/content/drive/My Drive/G-DBSCAN/./a.out" data.txt > shared_kernelbfs.csv

In [None]:
cuda_labels = np.loadtxt("out_shared.txt", dtype=np.int32)
metrics.adjusted_rand_score(y, cuda_labels ), metrics.adjusted_rand_score(clustering.labels_, cuda_labels)
#plt.scatter(X[:, 0], X[:, 1], c=cuda_labels)
#plt.show()

Compiles and profiles different grid invocation

In [None]:
!date

# Useful code to keep around 

In [None]:
#!ncu --kernel-name compute_degrees --launch-skip 0 --launch-count 1 --section ComputeWorkloadAnalysis "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
#!ncu --kernel-name compute_degrees --launch-skip 0 --launch-count 1 "/content/drive/My Drive/G-DBSCAN/./a.out" ../../../data.txt

In [None]:
# To visualize with Nvidia Nsight graphic interface
#!rm *.qdrep
#!nsys profile ./a.out ../../../data.txt

In [None]:
# Only for debug purposes
#!cuda-memcheck ./a.out data.txt | head -n 100