In [1]:
# https://rapids.ai/cudf-pandas/
import sys
# !conda update --yes -n base -c conda-forge conda

# !conda install --prefix {sys.prefix} --yes -c conda-forge cudf=25.10 cuda-version=12.9


!pip install cupy-cuda12x cutensor-cu12 nvidia-nccl-cu12 nvidia-cudnn-cu12

!pip install \
    --extra-index-url=https://pypi.nvidia.com \
    "cudf-cu12==25.8.*" "dask-cudf-cu12==25.8.*" "cuml-cu12==25.8.*" \
    "cugraph-cu12==25.8.*" "nx-cugraph-cu12==25.8.*" "cuxfilter-cu12==25.8.*" \
    "cucim-cu12==25.8.*" "pylibraft-cu12==25.8.*" "raft-dask-cu12==25.8.*" \
    "cuvs-cu12==25.8.*" "nx-cugraph-cu12==25.8.*"

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting cutensor-cu12
  Downloading cutensor_cu12-2.3.0-py3-none-manylinux2014_x86_64.whl.metadata (2.2 kB)
Downloading cutensor_cu12-2.3.0-py3-none-manylinux2014_x86_64.whl (237.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m237.1/237.1 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: cutensor-cu12
Successfully installed cutensor-cu12-2.3.0
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com, https://pypi.nvidia.com
Collecting cudf-cu12==25.8.*
  Downloading https://pypi.nvidia.com/cudf-cu12/cudf_cu12-25.8.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
Collecting dask-cudf-cu12==25.8.*
  Downloading https://pypi.nvidia.com/dask-cudf-cu12/dask_cudf_cu12-25.8.0-py3-none-any.whl (50 kB

In [2]:
import cudf as pd
import sklearn
import cuml
from cuml.ensemble import RandomForestRegressor
# from sklearn.ensemble import RandomForestRegressor
from cuml.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.datasets import fetch_california_housing
import cupy as cp


In [2]:
cp.random.seed(0)

housing = fetch_california_housing()
housing_df = pd.DataFrame(housing['data'], columns=housing['feature_names'])
housing_df['target'] = housing['target']
# X = cp.asarray(housing['data'])
# y = cp.asarray(housing['target'])

In [3]:
X = housing_df.drop('target', axis=1)
y = housing_df['target']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [5]:
%%time
reg = RandomForestRegressor(n_estimators=1000)
fitted_reg = reg.fit(X_train, y_train)
# Took 800 s on CPU, 48.5 s on GPU with 10000 estimators

CPU times: user 8.05 s, sys: 5.9 s, total: 13.9 s
Wall time: 4.68 s


In [6]:
%%time
train_score = fitted_reg.score(X_train, y_train) # should return 1.0 since training data is already known!
# So if the model is powerful enough it'll score the max value.
if train_score < 1:
    print(f'Warning: LOW SCORE on training set: {train_score}')

CPU times: user 3.55 s, sys: 2.49 s, total: 6.04 s
Wall time: 3.9 s


In [7]:
%%time
prediction = fitted_reg.predict(X_test)

CPU times: user 1.41 s, sys: 456 ms, total: 1.87 s
Wall time: 1.86 s


In [8]:
sci_r2_score = sklearn.metrics.r2_score(cp.asnumpy(y_test), cp.asnumpy(prediction))
sci_r2_score

0.8114814023715806

In [9]:
cuml_r2_score = cuml.metrics.r2_score(y_test, prediction)
cuml_r2_score

0.8114814023715806

In [10]:
cvs = cross_val_score(reg, X, y, cv=10)
cvs

array([0.21683381, 0.70993299, 0.7210524 , 0.69173359, 0.77122026,
       0.69804836, 0.48513976, 0.6111571 , 0.49903383, 0.69465389])

In [12]:
reg_cross_val_score = cp.mean(cvs)
reg_cross_val_score

np.float64(0.6098805989638068)

In [13]:
cuml_r2_score, reg_cross_val_score

(0.8114814023715806, np.float64(0.6098805989638068))