# Comparing Machine Learning Libraries

For this overview example, we will create a classification model using:

1. scikit-learn
2. Keras
3. PyTorch

## scikit-learn style

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFECV
from sklearn.model_selection import train_test_split

In [2]:
%%time
cancer = load_breast_cancer()
X_scaled = StandardScaler().fit_transform(cancer.data)
print(X_scaled.shape)

(569, 30)
CPU times: user 15.7 ms, sys: 3.2 ms, total: 18.9 ms
Wall time: 33.6 ms


In [3]:
%%time
poly = PolynomialFeatures(2)
X_poly = poly.fit_transform(X_scaled)
print(X_poly.shape)

(569, 496)
CPU times: user 14.7 ms, sys: 3.64 ms, total: 18.3 ms
Wall time: 43.1 ms


In [4]:
%%time
rfc = RandomForestClassifier(max_depth=7, n_estimators=10, random_state=1)
rfecv = RFECV(estimator=rfc, cv=5, n_jobs=-1)
X_poly_top = rfecv.fit_transform(X_poly, cancer.target)
print(X_poly_top.shape)

(569, 278)
CPU times: user 11.1 s, sys: 138 ms, total: 11.2 s
Wall time: 1min 7s


In [5]:
%%time
X_train, X_test, y_train, y_test = train_test_split(
    X_poly_top, cancer.target, random_state=42)

rfc = RandomForestClassifier(max_depth=7, n_estimators=10, random_state=1)
print(rfc.fit(X_train, y_train).score(X_test, y_test))

0.937062937063
CPU times: user 63.2 ms, sys: 5.19 ms, total: 68.4 ms
Wall time: 71.3 ms


## Keras style

In [6]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=42)

Using TensorFlow backend.


In [43]:
batch_size = 32
in_dim = cancer.data.shape[1]
hidden1 = X_poly_top.shape[1]
hidden2 = 20
out_dim = 1
batches_in_data = X_train.shape[0]/batch_size
epochs = int(5000/batches_in_data)
learning_rate = 1e-4


model_k = keras.models.Sequential([
    keras.layers.Dense(hidden1, activation='relu', input_shape=(in_dim,)),
    keras.layers.Dense(hidden2),
    keras.layers.LeakyReLU(),
    keras.layers.Dropout(rate=0.25),
    keras.layers.Dense(out_dim, activation='sigmoid')
])

In [44]:
model_k.summary()        

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 278)               8618      
_________________________________________________________________
dense_29 (Dense)             (None, 20)                5580      
_________________________________________________________________
leaky_re_lu_10 (LeakyReLU)   (None, 20)                0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 20)                0         
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 21        
Total params: 14,219
Trainable params: 14,219
Non-trainable params: 0
_________________________________________________________________


In [45]:
model_k.compile(loss='mean_squared_error',
                optimizer=keras.optimizers.RMSprop(lr=learning_rate),
                metrics=['accuracy'])

history = model_k.fit(X_train, y_train,
                      batch_size=batch_size,
                      epochs=epochs,
                      verbose=True,
                      validation_data=(X_test, y_test))
score = model_k.evaluate(X_test, y_test, verbose=True)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Train on 426 samples, validate on 143 samples
Epoch 1/375
Epoch 2/375
Epoch 3/375
Epoch 4/375
Epoch 5/375
Epoch 6/375
Epoch 7/375
Epoch 8/375
Epoch 9/375
Epoch 10/375
Epoch 11/375
Epoch 12/375
Epoch 13/375
Epoch 14/375
Epoch 15/375
Epoch 16/375
Epoch 17/375
Epoch 18/375
Epoch 19/375
Epoch 20/375
Epoch 21/375
Epoch 22/375
Epoch 23/375
Epoch 24/375
Epoch 25/375
Epoch 26/375
Epoch 27/375
Epoch 28/375
Epoch 29/375
Epoch 30/375
Epoch 31/375
Epoch 32/375
Epoch 33/375
Epoch 34/375
Epoch 35/375
Epoch 36/375
Epoch 37/375
Epoch 38/375
Epoch 39/375
Epoch 40/375
Epoch 41/375
Epoch 42/375
Epoch 43/375
Epoch 44/375
Epoch 45/375
Epoch 46/375
Epoch 47/375
Epoch 48/375
Epoch 49/375
Epoch 50/375
Epoch 51/375
Epoch 52/375
Epoch 53/375
Epoch 54/375
Epoch 55/375
Epoch 56/375
Epoch 57/375
Epoch 58/375
Epoch 59/375
Epoch 60/375
Epoch 61/375
Epoch 62/375
Epoch 63/375
Epoch 64/375
Epoch 65/375
Epoch 66/375
Epoch 67/375
Epoch 68/375
Epoch 69/375
Epoch 70/375
Epoch 71/375
Epoch 72/375
Epoch 73/375
Epoch 74/375
E

Epoch 132/375
Epoch 133/375
Epoch 134/375
Epoch 135/375
Epoch 136/375
Epoch 137/375
Epoch 138/375
Epoch 139/375
Epoch 140/375
Epoch 141/375
Epoch 142/375
Epoch 143/375
Epoch 144/375
Epoch 145/375
Epoch 146/375
Epoch 147/375
Epoch 148/375
Epoch 149/375
Epoch 150/375
Epoch 151/375
Epoch 152/375
Epoch 153/375
Epoch 154/375
Epoch 155/375
Epoch 156/375
Epoch 157/375
Epoch 158/375
Epoch 159/375
Epoch 160/375
Epoch 161/375
Epoch 162/375
Epoch 163/375
Epoch 164/375
Epoch 165/375
Epoch 166/375
Epoch 167/375
Epoch 168/375
Epoch 169/375
Epoch 170/375
Epoch 171/375
Epoch 172/375
Epoch 173/375
Epoch 174/375
Epoch 175/375
Epoch 176/375
Epoch 177/375
Epoch 178/375
Epoch 179/375
Epoch 180/375
Epoch 181/375
Epoch 182/375
Epoch 183/375
Epoch 184/375
Epoch 185/375
Epoch 186/375
Epoch 187/375
Epoch 188/375
Epoch 189/375
Epoch 190/375
Epoch 191/375
Epoch 192/375
Epoch 193/375
Epoch 194/375
Epoch 195/375
Epoch 196/375
Epoch 197/375
Epoch 198/375
Epoch 199/375
Epoch 200/375
Epoch 201/375
Epoch 202/375
Epoch 

Epoch 261/375
Epoch 262/375
Epoch 263/375
Epoch 264/375
Epoch 265/375
Epoch 266/375
Epoch 267/375
Epoch 268/375
Epoch 269/375
Epoch 270/375
Epoch 271/375
Epoch 272/375
Epoch 273/375
Epoch 274/375
Epoch 275/375
Epoch 276/375
Epoch 277/375
Epoch 278/375
Epoch 279/375
Epoch 280/375
Epoch 281/375
Epoch 282/375
Epoch 283/375
Epoch 284/375
Epoch 285/375
Epoch 286/375
Epoch 287/375
Epoch 288/375
Epoch 289/375
Epoch 290/375
Epoch 291/375
Epoch 292/375
Epoch 293/375
Epoch 294/375
Epoch 295/375
Epoch 296/375
Epoch 297/375
Epoch 298/375
Epoch 299/375
Epoch 300/375
Epoch 301/375
Epoch 302/375
Epoch 303/375
Epoch 304/375
Epoch 305/375
Epoch 306/375
Epoch 307/375
Epoch 308/375
Epoch 309/375
Epoch 310/375
Epoch 311/375
Epoch 312/375
Epoch 313/375
Epoch 314/375
Epoch 315/375
Epoch 316/375
Epoch 317/375
Epoch 318/375
Epoch 319/375
Epoch 320/375
Epoch 321/375
Epoch 322/375
Epoch 323/375
Epoch 324/375
Epoch 325/375
Epoch 326/375
Epoch 327/375
Epoch 328/375
Epoch 329/375
Epoch 330/375
Epoch 331/375
Epoch 

## PyTorch style

In [10]:
import numpy as np
import torch
from torch.autograd import Variable

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=42)
cancer.data.shape

(569, 30)

In [11]:
# Create a sequential NN
batch_size = 32
in_dim = cancer.data.shape[1]
hidden1 = X_poly_top.shape[1]
hidden2 = 20
out_dim = 1
learning_rate = 1e-4

model_t = torch.nn.Sequential(
    torch.nn.Linear(in_dim, hidden1),
    torch.nn.ReLU(),
    # Add a hidden layer that loosely represents the poly features
    torch.nn.Linear(hidden1, hidden2),
    torch.nn.LeakyReLU(),  ## Use LeakyReLU might limit dead neurons
    # A Dropout layer sometimes reduces co-adaptation of neurons
    torch.nn.Dropout(p=0.25),
    # Add a second hidden layer for further abstraction
    torch.nn.Linear(hidden2, out_dim),  
    # Add an output layer
    torch.nn.Sigmoid()
)

loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.RMSprop(model_t.parameters(), lr=learning_rate)

In [12]:
from torchsummary import summary
summary(model_t, input_size=(1,in_dim))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1               [-1, 1, 278]           8,618
              ReLU-2               [-1, 1, 278]               0
            Linear-3                [-1, 1, 20]           5,580
         LeakyReLU-4                [-1, 1, 20]               0
           Dropout-5                [-1, 1, 20]               0
            Linear-6                 [-1, 1, 1]              21
           Sigmoid-7                 [-1, 1, 1]               0
Total params: 14,219
Trainable params: 14,219
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.05
Estimated Total Size (MB): 0.06
----------------------------------------------------------------


In [33]:
model_k.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_13 (Dense)             (None, 278)               8618      
_________________________________________________________________
dense_14 (Dense)             (None, 20)                5580      
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 20)                0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 20)                0         
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 21        
Total params: 14,219
Trainable params: 14,219
Non-trainable params: 0
_________________________________________________________________


In [13]:
## Now run model
X = torch.from_numpy(X_train).float()
y = torch.from_numpy(y_train[:, np.newaxis]).float()
X_test_T = torch.from_numpy(X_test).float()
y_test_T = torch.from_numpy(y_test[:, np.newaxis]).float()

show_every = 75
for t in range(5000):
    # Forward pass: compute predicted y by passing x to the model.
    y_pred = model_t(X)

    # Compute and print loss.
    loss = loss_fn(y_pred, y)
    if not t % show_every:
        y_test_pred = model_t(Variable(X_test_T))
        prediction = [int(x > 0.5) for x in y_test_pred.data.numpy()]
        test_accuracy = (prediction == y_test).sum() / len(y_test)
        train_pred = [int(x > 0.5) for x in y_pred.data.numpy()]
        train_accuracy = (train_pred == y_train).sum() / len(y_train)
        print("Batch: %04d | Training Loss: %6.2f | Train accuracy: %.4f | Test accuracy: %.4f" % (
                      t, loss.item(), train_accuracy, test_accuracy))

    # Before the backward pass, use the optimizer object to zero all of the
    # gradients for the variables it will update (which are the learnable
    # weights of the model). This is because by default, gradients are
    # accumulated in buffers( i.e, not overwritten) whenever .backward()
    # is called. Checkout docs of torch.autograd.backward for more details.
    optimizer.zero_grad()

    # Backward pass: compute gradient of the loss with respect to model
    # parameters
    loss.backward()

    # Calling the step function on an Optimizer makes an update to its
    # parameters
    optimizer.step()

Batch: 0000 | Training Loss: 246.87 | Train accuracy: 0.3967 | Test accuracy: 0.4266
Batch: 0075 | Training Loss: 148.80 | Train accuracy: 0.6479 | Test accuracy: 0.6364
Batch: 0150 | Training Loss: 150.97 | Train accuracy: 0.6432 | Test accuracy: 0.6434
Batch: 0225 | Training Loss: 149.40 | Train accuracy: 0.6502 | Test accuracy: 0.6503
Batch: 0300 | Training Loss: 143.48 | Train accuracy: 0.6620 | Test accuracy: 0.6224
Batch: 0375 | Training Loss:  33.58 | Train accuracy: 0.9108 | Test accuracy: 0.9580
Batch: 0450 | Training Loss:  28.26 | Train accuracy: 0.9202 | Test accuracy: 0.9510
Batch: 0525 | Training Loss:  27.62 | Train accuracy: 0.9202 | Test accuracy: 0.9441
Batch: 0600 | Training Loss:  29.57 | Train accuracy: 0.9108 | Test accuracy: 0.9510
Batch: 0675 | Training Loss:  25.97 | Train accuracy: 0.9296 | Test accuracy: 0.9371
Batch: 0750 | Training Loss:  24.36 | Train accuracy: 0.9272 | Test accuracy: 0.9510
Batch: 0825 | Training Loss:  24.89 | Train accuracy: 0.9202 | Te