*Step* 1: Data Preparation


In [1]:
import pandas as pd
import cupy as cp
import numpy as np
import time
from sklearn.model_selection import train_test_split

# Load the dataset
data_path = '/content/MLoGPU_data1_train.csv'
df = pd.read_csv(data_path)

# Convert the dataset to CuPy arrays
X_gpu = cp.asarray(df.iloc[:, :-1].values)
y_gpu = cp.asarray(df.iloc[:, -1].values)

# Calculate the mean and standard deviation for each feature using CuPy
mean_gpu = cp.mean(X_gpu, axis=0)
std_dev_gpu = cp.std(X_gpu, axis=0)

# Standardize the features manually using CuPy
X_scaled_gpu = (X_gpu - mean_gpu) / std_dev_gpu

# Convert scaled features and labels back to NumPy arrays for train_test_split
X_scaled = cp.asnumpy(X_scaled_gpu)
y = cp.asnumpy(y_gpu)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert the split data back to CuPy arrays for further GPU computation
X_train_gpu = cp.asarray(X_train)
X_test_gpu = cp.asarray(X_test)
y_train_gpu = cp.asarray(y_train, dtype=cp.int32)
y_test_gpu = cp.asarray(y_test, dtype=cp.int32)


Step 2:Kernel

In [2]:
# Define CUDA kernel for computing distances
from cupy import RawKernel
kernel_code = '''
extern "C" __global__
void compute_distances(const float* X, const float* Y, float* D, int num_rows_x, int num_rows_y, int num_cols) {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    int j = blockDim.y * blockIdx.y + threadIdx.y;

    if (i < num_rows_x && j < num_rows_y) {
        float distance = 0;
        for (int k = 0; k < num_cols; ++k) {
            float diff = X[i * num_cols + k] - Y[j * num_cols + k];
            distance += diff * diff;
        }
        D[i * num_rows_y + j] = sqrt(distance);
    }
}
'''

Step 3: Compute distances

In [3]:
compute_distances = RawKernel(kernel_code, 'compute_distances')

# Function to compute distances on GPU
def compute_distances_gpu(X, Y):
    num_rows_x, num_cols_x = X.shape
    num_rows_y, num_cols_y = Y.shape
    D = cp.zeros((num_rows_x, num_rows_y), dtype=cp.float32)
    block_dim = (16, 16)
    grid_dim = (int((num_rows_x + block_dim[0] - 1) / block_dim[0]), int((num_rows_y + block_dim[1] - 1) / block_dim[1]))
    compute_distances(grid_dim, block_dim, (X, Y, D, num_rows_x, num_rows_y, num_cols_x))
    return D

# Compute distances from test set to training set
start_time = time.time()
D_test_gpu = compute_distances_gpu(X_test_gpu, X_train_gpu)
elapsed_time_compute = time.time() - start_time

Step 4: KNN Classification

In [4]:
# Define the number of neighbors
k = 15

# Function for kNN classification on GPU
def knn_classification_gpu(D, y, k):
    # Get indices of k smallest distances for each row
    indices_k_nearest = cp.argsort(D, axis=1)[:, :k]
    # Gather the labels of these nearest neighbors
    nearest_labels = cp.array([y[i] for i in indices_k_nearest])
    # Vote for the most common label
    votes = cp.array([cp.bincount(labels, minlength=int(y.max()) + 1).argmax() for labels in nearest_labels])
    return votes

# Use the function for classification on GPU
start_time_knn_gpu = time.time()
predicted_labels_test_gpu = knn_classification_gpu(D_test_gpu, y_train_gpu, k)
elapsed_time_knn_gpu = time.time() - start_time_knn_gpu
print(f"GPU kNN Classification Time: {elapsed_time_knn_gpu:.4f} seconds")

# Calculate accuracy for GPU predictions
accuracy_gpu = cp.mean(predicted_labels_test_gpu == y_test_gpu).get() * 100
print(f"GPU Accuracy: {accuracy_gpu:.2f}%")

GPU kNN Classification Time: 7.6447 seconds
GPU Accuracy: 43.50%
