# Import Moudles
---

Import CPU vs GPU all code

If you want to see how many utilize your gpu, run below on terminal

```cmd
watch -n 0.1 nvidia-smi
```

In [1]:
import time

import cupy as cp
import numpy as np

import cudf
import pandas as pd

from sklearn.ensemble import RandomForestClassifier as RF_cpu
from cuml.ensemble    import RandomForestClassifier as RF_gpu

# Make Timer Class
---

Always Check Time 

CPU based code vs GPU based code

In [2]:
class Timer:
    def __init__(self, name="cpu"):
        self.name = name
        
    def __enter__(self):
        self.start = time.perf_counter()
        
    def __exit__(self, type, value, trackback):
        self.end = time.perf_counter()
        self.execute_time = self.end - self.start
        print(f"{self.name} execute time : {self.execute_time:.4f} seconds")

# Load Data

In [3]:
import os
import glob

path = glob.glob(os.path.join("data", "*"))
print(path)

['data/gender_submission.csv', 'data/train.csv', 'data/train.parquet', 'data/test.csv', 'data/test.parquet']


In [4]:
with Timer(name="cpu") as cpu_time:
    train_data_cpu = pd.read_csv("./data/train.csv")
    test_data_cpu  = pd.read_csv("./data/test.csv")

with Timer(name="gpu") as gpu_time:
    train_data_gpu = cudf.read_csv("./data/train.csv")
    test_data_gpu  = cudf.read_csv("./data/test.csv")

cpu execute time : 0.0077 seconds
gpu execute time : 0.3630 seconds


In [5]:
train_data_cpu.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [6]:
train_data_gpu.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [7]:
with Timer(name="cpu") as cpu_time:
    women = train_data_cpu.loc[train_data_cpu.Sex == 'female']["Survived"]
    rate_women = sum(women)/len(women)    
    print(f"% of women who survived: {rate_women}")
    
    men = train_data_cpu.loc[train_data_cpu.Sex == 'male']["Survived"]
    rate_men = sum(men)/len(men)    
    print(f"% of men who survived: {rate_men}")

% of women who survived: 0.7420382165605095
% of men who survived: 0.18890814558058924
cpu execute time : 0.0013 seconds


In [8]:
with Timer(name="gpu") as gpu_time:
    women = train_data_gpu.loc[train_data_gpu.Sex == 'female']["Survived"]
    rate_women = women.sum()/len(women)    
    print(f"% of women who survived: {rate_women}")
    
    men = train_data_gpu.loc[train_data_gpu.Sex == 'male']["Survived"]
    rate_men = men.sum()/len(men)    
    print(f"% of men who survived: {rate_men}")

% of women who survived: 0.7420382165605095
% of men who survived: 0.18890814558058924
gpu execute time : 0.0224 seconds


In [9]:
with Timer(name="cpu run") as cpu_time:
    y = train_data_cpu["Survived"]
    
    features = ["Pclass", "Sex", "SibSp", "Parch"]
    X = pd.get_dummies(train_data_cpu[features]).astype(np.float32)
    X_test = pd.get_dummies(test_data_cpu[features]).astype(np.float32)
    
    model = RF_cpu(n_estimators=100, max_depth=5, random_state=1)
    model.fit(X, y)
    predictions = model.predict(X_test)
    
    output = pd.DataFrame({'PassengerId': test_data_cpu.PassengerId, 'Survived': predictions})
    
with Timer(name="cpu save") as cpu_time:
    output.to_csv('submission_RF_cpu.csv', index=False)
    print("Your submission was successfully saved!")

cpu run execute time : 0.0905 seconds
Your submission was successfully saved!
cpu save execute time : 0.0014 seconds


In [10]:
with Timer(name="gpu run") as gpu_time:
    y = train_data_gpu["Survived"]
    
    features = ["Pclass", "Sex", "SibSp", "Parch"]
    X = cudf.get_dummies(train_data_gpu[features]).astype(cp.float32)
    X_test = cudf.get_dummies(test_data_gpu[features]).astype(cp.float32)
    
    model = RF_gpu(n_estimators=100, max_depth=5, random_state=1, n_streams=1)
    model.fit(X, y)
    predictions = model.predict(X_test)
    
    output = cudf.DataFrame({'PassengerId': test_data_gpu.PassengerId, 'Survived': predictions})
    
with Timer(name="gpu save") as gpu_time:    
    output.to_csv('submission_RF_gpu.csv', index=False)
    print("Your submission was successfully saved!")



gpu run execute time : 0.3367 seconds
Your submission was successfully saved!
gpu save execute time : 0.0029 seconds


# More complicate and iterate samples

In [11]:
with Timer(name="cpu run") as cpu_time:
    y = train_data_cpu["Survived"]
    
    features = ["Pclass", "Sex", "SibSp", "Parch"]
    X = pd.get_dummies(train_data_cpu[features]).astype(np.float32)
    X_test = pd.get_dummies(test_data_cpu[features]).astype(np.float32)
    
    model = RF_cpu(n_estimators=10000, max_depth=100, random_state=1)
    model.fit(X, y)
    predictions = model.predict(X_test)
    
    output = pd.DataFrame({'PassengerId': test_data_cpu.PassengerId, 'Survived': predictions})

with Timer(name="cpu save") as cpu_time:
    output.to_csv('submission_RF_cpu.csv', index=False)
    print("Your submission was successfully saved!")

cpu run execute time : 8.5194 seconds
Your submission was successfully saved!
cpu save execute time : 0.0010 seconds


In [12]:
with Timer(name="gpu run") as cpu_time:
    y = train_data_gpu["Survived"]
    
    features = ["Pclass", "Sex", "SibSp", "Parch"]
    X = cudf.get_dummies(train_data_gpu[features]).astype(cp.float32)
    X_test = cudf.get_dummies(test_data_gpu[features]).astype(cp.float32)
    
    model = RF_gpu(n_estimators=10000, max_depth=100, random_state=1, n_streams=1)
    model.fit(X, y)
    predictions = model.predict(X_test)
    
    output = cudf.DataFrame({'PassengerId': test_data_gpu.PassengerId, 'Survived': predictions})

with Timer(name="gpu save") as cpu_time:
    output.to_csv('submission_RF_gpu.csv', index=False)
    print("Your submission was successfully saved!")



gpu run execute time : 32.7713 seconds
Your submission was successfully saved!
gpu save execute time : 0.0009 seconds
