# CPU vs. GPU Scikit-Learn Benchmarks

## Imports

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures
import time
import pandas as pd
import subprocess
from IPython import get_ipython
import pynvml

## Load a Dataset of Your Choice

In [3]:
covertype = datasets.fetch_covtype()
x = covertype.data
y = covertype.target

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=432
)

print("columns:", x.shape[1])
print("rows:", x.shape[0])

columns: 54
rows: 581012


## Benchmarking Functions

In [4]:
def benchmark_one(
    x_train, x_test, y_train, y_test,
    random_state=8, 
    n_jobs=-1, 
    n_estimators=100, 
    max_features="sqrt", 
    poly=False
):
    """
    training benchmark function for CPU and GPU
    """
    if poly == True:
        poly = PolynomialFeatures()
        x_train = poly.fit_transform(x_train)
        x_test = poly.fit_transform(x_test)

    # CPU 1540 features
    model = RandomForestClassifier(
        random_state=random_state,
        n_jobs=n_jobs,
        n_estimators=n_estimators,
        max_features=max_features,     
        )

    start_time = time.time()
    
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    n_features = x_train.shape[1]
    end_time = time.time() - start_time

    return [
        n_estimators, 
        max_features, 
        random_state, 
        n_features, 
        accuracy, 
        end_time
    ]

def benchmark_many(
    estimators_many, max_features_many, file_name
):
    df = pd.DataFrame(
        columns=[
            "n_estimators", 
            "max_features", 
            "random_state", 
            "n_features", 
            "accuracy", 
            "time"
        ]
    )

    idx = 0
    for e in estimators_many:
        for m in max_features_many:
            for p in [True, False]:
                df.loc[idx] = benchmark_one(
                    x_train, 
                    x_test, 
                    y_train, 
                    y_test, 
                    poly=p, 
                    n_estimators=e, 
                    max_features=m
                )
                print(
                    idx, 
                    "| poly:", p, 
                    "| max_features:", m, 
                    "| trees:",e , 
                    "| time:", df.loc[idx]["time"]
                )
                idx += 1
                
                # cool down for 5 seconds
                time.sleep(5)
                # save results to file in case of crash
                df.to_csv(file_name)
    print("benchmarking complete, saved as", file_name)
    return df

## CPU Benchmarking

In [4]:
device_make = subprocess.check_output("cat /proc/cpuinfo | grep 'model name' | head -n 1", shell=True, text=True).split(":")[1].strip()
print("wait while benchmarking CPU:", device_make, "...")

df = benchmark_many([100, 250, 500], ["sqrt", 1.0], "cpu_results.csv")

wait while benchmarking CPU: 12th Gen Intel(R) Core(TM) i9-12900K ...
0 | poly: True | max_features: sqrt | trees: 100 | time: 57.99017858505249
1 | poly: False | max_features: sqrt | trees: 100 | time: 11.688645839691162
2 | poly: True | max_features: 1.0 | trees: 100 | time: 925.05837059021
3 | poly: False | max_features: 1.0 | trees: 100 | time: 39.949706077575684
4 | poly: True | max_features: sqrt | trees: 250 | time: 143.8857319355011
5 | poly: False | max_features: sqrt | trees: 250 | time: 27.840553283691406
6 | poly: True | max_features: 1.0 | trees: 250 | time: 2296.461593389511
7 | poly: False | max_features: 1.0 | trees: 250 | time: 98.58275628089905
8 | poly: True | max_features: sqrt | trees: 500 | time: 283.33987498283386
9 | poly: False | max_features: sqrt | trees: 500 | time: 55.06251049041748
10 | poly: True | max_features: 1.0 | trees: 500 | time: 4581.99442076683
11 | poly: False | max_features: 1.0 | trees: 500 | time: 210.05930376052856
benchmarking complete, sav

### CPU Results

In [5]:
df

Unnamed: 0,n_estimators,max_features,random_state,n_features,accuracy,time
0,100.0,sqrt,8.0,1540.0,0.956378,57.990179
1,100.0,sqrt,8.0,54.0,0.953796,11.688646
2,100.0,1.0,8.0,1540.0,0.967006,925.058371
3,100.0,1.0,8.0,54.0,0.968219,39.949706
4,250.0,sqrt,8.0,1540.0,0.957092,143.885732
5,250.0,sqrt,8.0,54.0,0.955173,27.840553
6,250.0,1.0,8.0,1540.0,0.967462,2296.461593
7,250.0,1.0,8.0,54.0,0.968469,98.582756
8,500.0,sqrt,8.0,1540.0,0.95723,283.339875
9,500.0,sqrt,8.0,54.0,0.955423,55.06251


## GPU Benchmarking

### Restart Kernel

In [6]:
get_ipython().kernel.do_shutdown(restart=True)

{'status': 'ok', 'restart': True}

### Load cuML Scikit-Learn GPU Extenssion

In [1]:
import warnings
warnings.filterwarnings('ignore')

%load_ext cuml.accel

cuML: Could not enable managed memory.
cuML: Installed accelerator for sklearn.
cuML: Successfully initialized accelerator.


### 4.2 Re-Run Sections 1-3

Please make sure you:
- import required modules
- load dataset
- define nescessary functions

### GPU Results

In [5]:
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
device_make = pynvml.nvmlDeviceGetName(handle) 
pynvml.nvmlShutdown()

print("wait while benchmarking GPU:", device_make, "...")

df = benchmark_many([100, 250, 500], ["sqrt", 1.0], "gpu_results.csv")
df

wait while benchmarking GPU: NVIDIA GeForce RTX 4080 ...
0 | poly: True | max_features: sqrt | trees: 100 | time: 9.37942624092102
1 | poly: False | max_features: sqrt | trees: 100 | time: 3.4707376956939697
2 | poly: True | max_features: 1.0 | trees: 100 | time: 47.65140509605408
3 | poly: False | max_features: 1.0 | trees: 100 | time: 20.10451579093933
4 | poly: True | max_features: sqrt | trees: 250 | time: 13.472508907318115
5 | poly: False | max_features: sqrt | trees: 250 | time: 8.598607540130615
6 | poly: True | max_features: 1.0 | trees: 250 | time: 129.8329882621765
7 | poly: False | max_features: 1.0 | trees: 250 | time: 645.0701539516449
8 | poly: True | max_features: sqrt | trees: 500 | time: 23.3800687789917
9 | poly: False | max_features: sqrt | trees: 500 | time: 15.749321222305298
10 | poly: True | max_features: 1.0 | trees: 500 | time: 226.81419229507446
11 | poly: False | max_features: 1.0 | trees: 500 | time: 21.39484977722168
benchmarking complete, saved as gpu_res

Unnamed: 0,n_estimators,max_features,random_state,n_features,accuracy,time
0,100.0,sqrt,8.0,1540.0,0.815263,9.379426
1,100.0,sqrt,8.0,54.0,0.712297,3.470738
2,100.0,1.0,8.0,1540.0,0.889986,47.651405
3,100.0,1.0,8.0,54.0,0.896775,20.104516
4,250.0,sqrt,8.0,1540.0,0.814351,13.472509
5,250.0,sqrt,8.0,54.0,0.714577,8.598608
6,250.0,1.0,8.0,1540.0,0.890485,129.832988
7,250.0,1.0,8.0,54.0,0.897094,645.070154
8,500.0,sqrt,8.0,1540.0,0.81571,23.380069
9,500.0,sqrt,8.0,54.0,0.711711,15.749321


In [6]:
df

Unnamed: 0,n_estimators,max_features,random_state,n_features,accuracy,time
0,100.0,sqrt,8.0,1540.0,0.815263,9.379426
1,100.0,sqrt,8.0,54.0,0.712297,3.470738
2,100.0,1.0,8.0,1540.0,0.889986,47.651405
3,100.0,1.0,8.0,54.0,0.896775,20.104516
4,250.0,sqrt,8.0,1540.0,0.814351,13.472509
5,250.0,sqrt,8.0,54.0,0.714577,8.598608
6,250.0,1.0,8.0,1540.0,0.890485,129.832988
7,250.0,1.0,8.0,54.0,0.897094,645.070154
8,500.0,sqrt,8.0,1540.0,0.81571,23.380069
9,500.0,sqrt,8.0,54.0,0.711711,15.749321
