In [1]:
!nvidia-smi

Sat Jan 25 05:43:40 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.44       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
import pynvml
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
device_name = pynvml.nvmlDeviceGetName(handle)
if (device_name != b'Tesla T4') and (device_name != b'Tesla P100-PCIE-16GB'):
  raise Exception("""
    Unfortunately this instance does not have a T4 or P100 GPU.
    
    Please make sure you've configured Colab to request a GPU instance type.
    
    Sometimes Colab allocates a Tesla K80 instead of a T4 or P100. Resetting the instance.
If you get a K80 GPU, try Runtime -> Reset all runtimes...
  """)
else:
  print('Yes, you got the right kind of GPU to work and it is a Tesla T4 GPU.')

Yes, you got the right kind of GPU to work and it is a Tesla T4 GPU.


In [4]:
!wget -nc https://github.com/rapidsai/notebooks-extended/raw/master/utils/rapids-colab.sh
!bash rapids-colab.sh

import sys, os

sys.path.append('/usr/local/lib/python3.6/site-packages/')
os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'
os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'

--2020-01-25 05:49:11--  https://github.com/rapidsai/notebooks-extended/raw/master/utils/rapids-colab.sh
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://github.com/rapidsai/notebooks-contrib/raw/master/utils/rapids-colab.sh [following]
--2020-01-25 05:49:11--  https://github.com/rapidsai/notebooks-contrib/raw/master/utils/rapids-colab.sh
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh [following]
--2020-01-25 05:49:11--  https://raw.githubusercontent.com/rapidsai/notebooks-contrib/master/utils/rapids-colab.sh
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.co

# **Stochastic Gradient Descent (SGD)** 
SGD is an  incremental gradient descent algorithm which modifies its weights, in an effort to reach a local minimum. The cuML implementation takes only numpy arrays and cuDF datasets as inputs. In order to convert your dataset into a cuDF dataframe format please refer the documentation on https://rapidsai.github.io/projects/cudf/en/latest/. The SGD algorithm implemented in cuML can accept the following parameters:
1. loss : 'hinge', 'log', 'squared_loss' (default = 'squared_loss')
2. penalty: 'none', 'l1', 'l2', 'elasticnet' (default = 'none')
3. alpha: float (default = 0.0001)
4. fit_intercept : boolean (default = True)
5. epochs : int (default = 1000)
6. tol : float (default = 1e-3)
7. shuffle : boolean (default = True)
8. eta0 : float (default = 0.0)
9. power_t : float (default = 0.5)
10. learning_rate : 'optimal', 'constant', 'invscaling', 'adaptive' (default = 'constant')
11. n_iter_no_change : int (default = 5)

For additional information on the SGD model please refer to the documentation on https://rapidsai.github.io/projects/cuml/en/latest/index.html


In [0]:
import os

import numpy as np
import pandas as pd

import cudf
from cuml.solvers import SGD as cumlSGD

from sklearn.linear_model import SGDRegressor

In [0]:
# check if the mortgage dataset is present and then extract the data from it, else just create a random dataset for sgd 
import gzip
# change the path of the mortgage dataset if you have saved it in a different directory
def load_data(nrows, ncols, cached = 'mortgage.npy.gz'):
    if os.path.exists(cached):
        print('use mortgage data')

        with gzip.open(cached) as f:
            X = np.load(f)
        # the 4th column is 'adj_remaining_months_to_maturity'
        # used as the label
        X = X[:,[i for i in range(X.shape[1]) if i!=4]]
        y = X[:,4:5]
        rindices = np.random.randint(0,X.shape[0]-1,nrows)
        X = X[rindices,:ncols]
        y = y[rindices]

    else:
        # create a random dataset
        print('use random data')
        X = np.random.rand(nrows,ncols)
        y = np.random.randint(0,10,size=(nrows,1))
    train_rows = int(nrows*0.8)
    df_X_train = pd.DataFrame({'fea%d'%i:X[0:train_rows,i] for i in range(X.shape[1])})
    df_X_test = pd.DataFrame({'fea%d'%i:X[train_rows:,i] for i in range(X.shape[1])})
    df_y_train = pd.DataFrame({'fea%d'%i:y[0:train_rows,i] for i in range(y.shape[1])})
    df_y_test = pd.DataFrame({'fea%d'%i:y[train_rows:,i] for i in range(y.shape[1])})
    return df_X_train, df_X_test, df_y_train, df_y_test

In [0]:
# this function checks if the results obtained from two different methods (sklearn and cuml) are the same
from sklearn.metrics import mean_squared_error
def array_equal(a,b,threshold=2e-3,with_sign=True):
    a = to_nparray(a).ravel()
    b = to_nparray(b).ravel()
    if with_sign == False:
        a,b = np.abs(a),np.abs(b)
    error = mean_squared_error(a,b)
    res = error<threshold
    return res

# the function converts a variable from ndarray or dataframe format to numpy array
def to_nparray(x):
    if isinstance(x,np.ndarray) or isinstance(x,pd.DataFrame):
        return np.array(x)
    elif isinstance(x,np.float64):
        return np.array([x])
    elif isinstance(x,cudf.DataFrame) or isinstance(x,cudf.Series):
        return x.to_pandas().values
    return x

## **Loading Data**

In [10]:
from google.colab import files
uploaded = files.upload()

Saving mortgage.npy.gz to mortgage.npy.gz


In [13]:
%%time
# nrows = number of samples
# ncols = number of features of each sample
nrows = 2**20
ncols = 399

# dataset is split into a ratio of 80:20, 
# 80% is used as the training data and the remaining 20% is used as the test data
X_train, X_test, y_train, y_test = load_data(nrows,ncols)
y_train_ser = y_train['fea0']
print('training data',X_train.shape)
print('training label',y_train.shape)
print('testing data',X_test.shape)
print('testing label',y_test.shape)

use mortgage data
training data (838860, 399)
training label (838860, 1)
testing data (209716, 399)
testing label (209716, 1)
CPU times: user 14.4 s, sys: 436 ms, total: 14.8 s
Wall time: 14.9 s


In [14]:
X_train

Unnamed: 0,fea0,fea1,fea2,fea3,fea4,fea5,fea6,fea7,fea8,fea9,fea10,fea11,fea12,fea13,fea14,fea15,fea16,fea17,fea18,fea19,fea20,fea21,fea22,fea23,fea24,fea25,fea26,fea27,fea28,fea29,fea30,fea31,fea32,fea33,fea34,fea35,fea36,fea37,fea38,fea39,...,fea359,fea360,fea361,fea362,fea363,fea364,fea365,fea366,fea367,fea368,fea369,fea370,fea371,fea372,fea373,fea374,fea375,fea376,fea377,fea378,fea379,fea380,fea381,fea382,fea383,fea384,fea385,fea386,fea387,fea388,fea389,fea390,fea391,fea392,fea393,fea394,fea395,fea396,fea397,fea398
0,0.694444,0.223818,0.040541,0.513672,0.383997,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.666667,0.196900,0.013514,0.408203,0.763571,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.611111,0.110660,0.103604,0.720703,0.786088,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.500000,0.207516,0.180180,0.687500,0.499799,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.611111,0.106792,0.265766,0.650391,0.565742,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838855,0.722222,0.196900,0.013514,0.759766,0.841576,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
838856,0.805556,0.196900,0.018018,0.757812,0.548050,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
838857,0.500000,0.158911,0.067568,0.384766,0.503820,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
838858,0.541667,0.215097,0.099099,0.722656,0.531162,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
X_test

Unnamed: 0,fea0,fea1,fea2,fea3,fea4,fea5,fea6,fea7,fea8,fea9,fea10,fea11,fea12,fea13,fea14,fea15,fea16,fea17,fea18,fea19,fea20,fea21,fea22,fea23,fea24,fea25,fea26,fea27,fea28,fea29,fea30,fea31,fea32,fea33,fea34,fea35,fea36,fea37,fea38,fea39,...,fea359,fea360,fea361,fea362,fea363,fea364,fea365,fea366,fea367,fea368,fea369,fea370,fea371,fea372,fea373,fea374,fea375,fea376,fea377,fea378,fea379,fea380,fea381,fea382,fea383,fea384,fea385,fea386,fea387,fea388,fea389,fea390,fea391,fea392,fea393,fea394,fea395,fea396,fea397,fea398
0,0.569444,0.196900,0.027027,0.753906,0.670285,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.444444,0.137133,0.243243,0.308594,0.531162,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.555556,0.096479,0.441441,0.574219,0.321271,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.611111,0.200430,0.153153,0.699219,0.531162,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.680556,0.196900,0.009009,0.761719,0.341375,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209711,0.500000,0.265585,0.054054,0.742188,0.782067,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209712,0.736111,0.361153,0.234234,0.664062,0.396864,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209713,0.638889,0.318892,0.090090,0.726562,0.242461,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209714,0.583333,0.207611,0.072072,0.382812,0.255328,0.0,0.044486,0.059951,0.061698,0.716319,0.083032,0.231624,0.113612,0.212412,0.00678,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
y_train

Unnamed: 0,fea0
0,0.383997
1,0.763571
2,0.786088
3,0.499799
4,0.565742
...,...
838855,0.841576
838856,0.548050
838857,0.503820
838858,0.531162


In [17]:
y_test

Unnamed: 0,fea0
0,0.670285
1,0.531162
2,0.321271
3,0.531162
4,0.341375
...,...
209711,0.782067
209712,0.396864
209713,0.242461
209714,0.255328


## **Model Parameter**

In [0]:
#set parameters 
learning_rate = 'adaptive'
datatype = np.float32
penalty = 'elasticnet'
loss = 'squared_loss'
iterations = 10 

## **Scikit-Learn Implementation**

In [19]:
%%time
# use the sklearn SGD Regressor model to fit the dataset 
sk_sgd = SGDRegressor(learning_rate = learning_rate, 
                      eta0 = 0.07,
                      max_iter = iterations, 
                      tol = 0.0, 
                      fit_intercept = True,
                      penalty = penalty, 
                      loss = loss)

sk_sgd.fit(X_train, y_train_ser)

CPU times: user 22.6 s, sys: 931 ms, total: 23.5 s
Wall time: 23.6 s




In [24]:
%%time
# test the model by predicting its results for the unseen test set
y_pred_sk_sgd = sk_sgd.predict(X_test)

# calculate the Mean Squared Error for the model's predictions
error_sk_sgd = mean_squared_error(y_test, y_pred_sk_sgd)

CPU times: user 422 ms, sys: 54 ms, total: 476 ms
Wall time: 394 ms


## **cuML Implementation**

In [22]:
%%time
# convert the pandas dataframe to cuDF dataframe and series
X_cudf = cudf.DataFrame.from_pandas(X_train)
X_cudf_test = cudf.DataFrame.from_pandas(X_test)
y_cudf = cudf.Series(y_train_ser)

CPU times: user 1.68 s, sys: 363 ms, total: 2.04 s
Wall time: 4.72 s


In [23]:
%%time
# fit the training data on cuML's implementation of SGD
cuml_sgd = cumlSGD(learning_rate = learning_rate, 
                   eta0 = 0.07, 
                   epochs = iterations, #epochs == n_iter
                   batch_size = 512,
                   tol = 0.0, 
                   penalty = penalty, 
                   loss = loss)
cuml_sgd.fit(X_cudf, y_cudf)

CPU times: user 4.22 s, sys: 922 ms, total: 5.14 s
Wall time: 9.58 s


In [25]:
%%time
# test the model by predicting its values for the test set
y_pred_cuml_sgd = cuml_sgd.predict(X_cudf_test)
y_pred_cuml_sgd = to_nparray(y_pred_cuml_sgd).ravel()
# calculate the Mean Squared Error for the model's predictions
error_cuml_sgd = mean_squared_error(y_test, y_pred_cuml_sgd)

CPU times: user 204 ms, sys: 11.1 ms, total: 215 ms
Wall time: 218 ms


In [26]:
# print the MSE of the sklearn and cuML models to compare them
print("SK MSE(y):", error_sk_sgd)
print("CUML MSE(y):", error_cuml_sgd)

SK MSE(y): 1.203114542098755e-07
CUML MSE(y): 1.0342661e-07
