In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import time

from cuml.neighbors import KNeighborsRegressor as cuKNN
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('/content/drive/MyDrive/Machine Learning Project - 605/KMeans/yellow_tripdata.csv', header=None, low_memory=False)

In [None]:
df.columns = df.iloc[0]
df = df[1:].reset_index(drop=True)

In [None]:
df.head()

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,pickup_longitude,pickup_latitude,RatecodeID,store_and_fwd_flag,dropoff_longitude,dropoff_latitude,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount
0,1,2016-03-01 00:00:00,2016-03-01 00:07:55,1,2.5,-73.97674560546875,40.765151977539055,1,N,-74.00426483154298,40.74612808227539,1,9.0,0.5,0.5,2.05,0.0,0.3,12.35
1,1,2016-03-01 00:00:00,2016-03-01 00:11:06,1,2.9,-73.98348236083984,40.767925262451165,1,N,-74.00594329833984,40.7331657409668,1,11.0,0.5,0.5,3.05,0.0,0.3,15.35
2,2,2016-03-01 00:00:00,2016-03-01 00:31:06,2,19.98,-73.78202056884764,40.64480972290039,1,N,-73.97454071044923,40.6757698059082,1,54.5,0.5,0.5,8.0,0.0,0.3,63.8
3,2,2016-03-01 00:00:00,2016-03-01 00:00:00,3,10.78,-73.86341857910156,40.769813537597656,1,N,-73.96965026855469,40.757766723632805,1,31.5,0.0,0.5,3.78,5.54,0.3,41.62
4,2,2016-03-01 00:00:00,2016-03-01 00:00:00,5,30.43,-73.97174072265625,40.79218292236328,3,N,-74.17716979980467,40.69505310058594,1,98.0,0.0,0.0,0.0,15.5,0.3,113.8


In [None]:
features = [
    'passenger_count', 'trip_distance', 'pickup_longitude', 'pickup_latitude',
    'dropoff_longitude', 'dropoff_latitude', 'fare_amount', 'extra',
    'mta_tax', 'tip_amount', 'tolls_amount', 'improvement_surcharge'
]
target = 'total_amount'

In [None]:
df = df.apply(pd.to_numeric, errors='coerce')

In [None]:
X = df[features]
y = df[target]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
import cuml
import cupy as cp

X_train_gpu = cp.asarray(X_train)
y_train_gpu = cp.asarray(y_train.values.astype(np.float32))

start_gpu = time.time()

knn_gpu = cuKNN(n_neighbors=5)
knn_gpu.fit(X_train_gpu, y_train_gpu)

end_gpu = time.time()
print(f"GPU Training Time: {end_gpu - start_gpu:.4f} seconds")


GPU Training Time: 2.1612 seconds


In [None]:
X_test_gpu = cp.asarray(X_test)

start_gpu_pred = time.time()

y_pred_gpu = knn_gpu.predict(X_test_gpu)

end_gpu_pred = time.time()
print(f"GPU Prediction Time: {end_gpu_pred - start_gpu_pred:.4f} seconds")


GPU Prediction Time: 712.0004 seconds


In [None]:
import cupy as cp
def print_memory_usage_gb():
  device = cp.cuda.Device(0)
  free_mem, total_mem = device.mem_info
  free_mem /= (1024 ** 3)
  total_mem /= (1024 ** 3)
  used_mem = total_mem - free_mem
  print(f"GPU VRAM Usage: {used_mem:.4f} GB out of {total_mem:.4f} GB")

print_memory_usage_gb()

GPU VRAM Usage: 1.6826 GB out of 14.7413 GB
