# CPU vs. GPU Scikit-Learn Benchmarks

## Imports

In [6]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures
import time
import pandas as pd
import subprocess
from IPython import get_ipython
import pynvml

## Load a Dataset of Your Choice

In [7]:
covertype = datasets.fetch_covtype()
x = covertype.data
y = covertype.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=432
)

print("columns:", x.shape[1])
print("rows:", x.shape[0])

columns: 54
rows: 581012


## Benchmarking Functions

In [8]:
def benchmark_one(
    x_train, x_test, y_train, y_test,
    random_state=8, 
    n_jobs=-1, 
    n_estimators=100, 
    max_features="sqrt", 
    poly=False
):
    """
    training benchmark function for CPU and GPU
    """
    if poly == True:
        poly = PolynomialFeatures()
        x_train = poly.fit_transform(x_train)
        x_test = poly.fit_transform(x_test)

    # CPU 1540 features
    model = RandomForestClassifier(
        random_state=random_state,
        n_jobs=n_jobs,
        n_estimators=n_estimators,
        max_features=max_features,     
        )

    start_time = time.time()
    
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    n_features = x_train.shape[1]
    end_time = time.time() - start_time

    return [
        n_estimators, 
        max_features, 
        random_state, 
        n_features, 
        accuracy, 
        end_time
    ]

def benchmark_many(
    estimators_many, max_features_many, file_name
):
    df = pd.DataFrame(
        columns=[
            "n_estimators", 
            "max_features", 
            "random_state", 
            "n_features", 
            "accuracy", 
            "time"
        ]
    )

    idx = 0
    for e in estimators_many:
        for m in max_features_many:
            for p in [True, False]:
                df.loc[idx] = benchmark_one(
                    x_train, 
                    x_test, 
                    y_train, 
                    y_test, 
                    poly=p, 
                    n_estimators=e, 
                    max_features=m
                )
                print(
                    idx, 
                    "| poly:", p, 
                    "| max_features:", m, 
                    "| trees:",e , 
                    "| time:", df.loc[idx]["time"]
                )
                idx += 1
                
                # cool down for 5 seconds
                time.sleep(5)       
                # save progress
                df.to_csv(file_name)
                
    print("benchmarking complete, saved as", file_name)
    return df

## CPU Benchmarking

In [6]:
device_make = subprocess.check_output("cat /proc/cpuinfo | grep 'model name' | head -n 1", shell=True, text=True).split(":")[1].strip()
print("wait while benchmarking CPU:", device_make, "...")

df = benchmark_many([100, 250, 500], ["sqrt", 1.0], "cpu_results.csv")

wait while benchmarking CPU: 13th Gen Intel(R) Core(TM) i9-13980HX ...
0 | poly: True | max_features: sqrt | trees: 100 | time: 69.61824464797974
1 | poly: False | max_features: sqrt | trees: 100 | time: 18.689647912979126
2 | poly: True | max_features: 1.0 | trees: 100 | time: 732.8227369785309
3 | poly: False | max_features: 1.0 | trees: 100 | time: 31.87282967567444
4 | poly: True | max_features: sqrt | trees: 250 | time: 80.40768790245056
5 | poly: False | max_features: sqrt | trees: 250 | time: 17.332719564437866
6 | poly: True | max_features: 1.0 | trees: 250 | time: 1622.349330663681
7 | poly: False | max_features: 1.0 | trees: 250 | time: 75.00583076477051
8 | poly: True | max_features: sqrt | trees: 500 | time: 164.63670802116394
9 | poly: False | max_features: sqrt | trees: 500 | time: 34.95435070991516
10 | poly: True | max_features: 1.0 | trees: 500 | time: 3199.3351464271545
11 | poly: False | max_features: 1.0 | trees: 500 | time: 152.20085954666138
benchmarking complete,

### CPU Results

In [7]:
df

Unnamed: 0,n_estimators,max_features,random_state,n_features,accuracy,time
0,100.0,sqrt,8.0,1540.0,0.956378,69.618245
1,100.0,sqrt,8.0,54.0,0.953796,18.689648
2,100.0,1.0,8.0,1540.0,0.967006,732.822737
3,100.0,1.0,8.0,54.0,0.968219,31.87283
4,250.0,sqrt,8.0,1540.0,0.957092,80.407688
5,250.0,sqrt,8.0,54.0,0.955173,17.33272
6,250.0,1.0,8.0,1540.0,0.967462,1622.349331
7,250.0,1.0,8.0,54.0,0.968469,75.005831
8,500.0,sqrt,8.0,1540.0,0.95723,164.636708
9,500.0,sqrt,8.0,54.0,0.955423,34.954351


## GPU Benchmarking

### Restart Kernel

In [4]:
get_ipython().kernel.do_shutdown(restart=True)

{'status': 'ok', 'restart': True}

### Load cuML Scikit-Learn GPU Extenssion

In [3]:
import warnings
warnings.filterwarnings('ignore')

%load_ext cuml.accel

The cuml.accel extension is already loaded. To reload it, use:
  %reload_ext cuml.accel


### 4.2 Re-Run Sections 1-3

Please make sure you:
- import required modules
- load dataset
- define nescessary functions

### GPU Results

In [9]:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
device_make = pynvml.nvmlDeviceGetName(handle) 
pynvml.nvmlShutdown()

print("wait while benchmarking GPU:", device_make, "...")

df = benchmark_many([100, 250, 500], ["sqrt", 1.0], "gpu_results.csv")
df

wait while benchmarking GPU: NVIDIA GeForce RTX 4090 Laptop GPU ...
0 | poly: True | max_features: sqrt | trees: 100 | time: 10.326878547668457
1 | poly: False | max_features: sqrt | trees: 100 | time: 5.022055149078369
2 | poly: True | max_features: 1.0 | trees: 100 | time: 56.20686411857605
3 | poly: False | max_features: 1.0 | trees: 100 | time: 6.612630844116211
4 | poly: True | max_features: sqrt | trees: 250 | time: 15.046459674835205
5 | poly: False | max_features: sqrt | trees: 250 | time: 16.597933292388916
6 | poly: True | max_features: 1.0 | trees: 250 | time: 137.88352823257446
7 | poly: False | max_features: 1.0 | trees: 250 | time: 14.746327638626099
8 | poly: True | max_features: sqrt | trees: 500 | time: 55.09551548957825
9 | poly: False | max_features: sqrt | trees: 500 | time: 20.17998743057251
10 | poly: True | max_features: 1.0 | trees: 500 | time: 278.7545804977417
11 | poly: False | max_features: 1.0 | trees: 500 | time: 26.76724362373352
benchmarking complete, sa

Unnamed: 0,n_estimators,max_features,random_state,n_features,accuracy,time
0,100.0,sqrt,8.0,1540.0,0.815263,10.326879
1,100.0,sqrt,8.0,54.0,0.712297,5.022055
2,100.0,1.0,8.0,1540.0,0.889986,56.206864
3,100.0,1.0,8.0,54.0,0.896775,6.612631
4,250.0,sqrt,8.0,1540.0,0.814351,15.04646
5,250.0,sqrt,8.0,54.0,0.714577,16.597933
6,250.0,1.0,8.0,1540.0,0.890485,137.883528
7,250.0,1.0,8.0,54.0,0.897094,14.746328
8,500.0,sqrt,8.0,1540.0,0.81571,55.095515
9,500.0,sqrt,8.0,54.0,0.711711,20.179987
