In [1]:
# https://rapids.ai/cudf-pandas/

!pip install \
    --extra-index-url=https://pypi.nvidia.com \
    "cudf-cu12==25.8.*" "dask-cudf-cu12==25.8.*" "cuml-cu12==25.8.*" \
    "cugraph-cu12==25.8.*" "nx-cugraph-cu12==25.8.*" "cuxfilter-cu12==25.8.*" \
    "cucim-cu12==25.8.*" "pylibraft-cu12==25.8.*" "raft-dask-cu12==25.8.*" \
    "cuvs-cu12==25.8.*" "nx-cugraph-cu12==25.8.*"

!pip install cupy-cuda12x cutensor-cu12 nvidia-nccl-cu12

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com, https://pypi.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [19]:
import cudf as pd
import sklearn
import cuml
from sklearn import config_context
from cuml.ensemble import RandomForestRegressor
# from sklearn.ensemble import RandomForestRegressor
from cuml.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.datasets import fetch_california_housing
import cupy as cp


In [10]:
cp.random.seed(0)

housing = fetch_california_housing()
housing_df = pd.DataFrame(housing['data'], columns=housing['feature_names'])
housing_df['target'] = housing['target']
# X = cp.asarray(housing['data'])
# y = cp.asarray(housing['target'])

In [11]:
X = housing_df.drop('target', axis=1)
y = housing_df['target']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [13]:
%%time
reg = RandomForestRegressor(n_estimators=1000)
fitted_reg = reg.fit(X_train, y_train)
# Took 800 s on CPU, 48.5 s on GPU

CPU times: user 7.71 s, sys: 6.03 s, total: 13.7 s
Wall time: 4.79 s


In [14]:
%%time
train_score = fitted_reg.score(X_train, y_train) # should return 1.0 since training data is already known!
# So if the model is powerful enough it'll score the max value.
if train_score < 1:
    print(f'Warning: LOW SCORE on training set: {train_score}')

CPU times: user 3.27 s, sys: 1.67 s, total: 4.94 s
Wall time: 2.91 s


In [15]:
%%time
prediction = fitted_reg.predict(X_test)

CPU times: user 1.33 s, sys: 629 ms, total: 1.96 s
Wall time: 1.95 s


In [22]:
%%time
sci_r2_score = sklearn.metrics.r2_score(cp.asnumpy(y_test), cp.asnumpy(prediction))
sci_r2_score

CPU times: user 1.6 ms, sys: 8.74 ms, total: 10.3 ms
Wall time: 9.71 ms


0.8083165807359329

In [23]:
%%time
cuml_r2_score = cuml.metrics.r2_score(y_test, prediction)
cuml_r2_score

CPU times: user 2.71 ms, sys: 0 ns, total: 2.71 ms
Wall time: 2.44 ms


0.8083165807359329