In [1]:
from numpy.random import rand
import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from sklearn import linear_model
from sklearn.neural_network import MLPRegressor
import tensorflow as tf
from scipy import stats
import torch
from gekko import GEKKO
from torch.autograd import Variable
from keras.models import Sequential
from keras.layers import *

In [2]:
test_time = 20.0

In [3]:
w = rand()
b = rand()

In [4]:
cols = ['scipy','statsmodel','numpy','sklearn','nn','keras nn','gekko','keras','tf','pytorch']
n = 100
index = []
for i in range(25):
    index.append(n)
    n *= 2
    
results = pd.DataFrame(columns=cols,index = index)

In [5]:
# Statsmodels OLS
n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n); y += noise
    
    # scipy
    xc = sm.add_constant(x)
    t0 = time.time()
    model = sm.OLS(y,xc).fit()
    predictions = model.predict(xc)
    t1 = time.time()
    print(n,t1-t0)
    results['statsmodel'].loc[n] = t1 - t0
    
    n = n * 2 # Double number of data points

100 0.05449867248535156
200 0.0009975433349609375
400 0.0
800 0.0009970664978027344
1600 0.001010894775390625
3200 0.0
6400 0.001995086669921875
12800 0.0029938220977783203
25600 0.003023862838745117
51200 0.008975744247436523
102400 0.012932538986206055
204800 0.035903215408325195
409600 0.07878923416137695
819200 0.12167549133300781
1638400 0.2622988224029541
3276800 0.48171377182006836
6553600 0.9982964992523193
13107200 1.9228532314300537
26214400 3.8522789478302
52428800 16.984108209609985


In [6]:
# Numpy
n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # numpy
    t0 = time.time()
    np.polyfit(x,y,1)
    t1 = time.time()
    print(n,t1-t0)
    results['numpy'].loc[n] = t1 - t0
    
    n = n * 2 # Double number of data points

100 0.03291201591491699
200 0.0019948482513427734
400 0.0009968280792236328
800 0.0009975433349609375
1600 0.0009975433349609375
3200 0.0009970664978027344
6400 0.0019948482513427734
12800 0.0029916763305664062
25600 0.0029938220977783203
51200 0.0069811344146728516
102400 0.011969566345214844
204800 0.02393651008605957
409600 0.04986715316772461
819200 0.08776521682739258
1638400 0.21342730522155762
3276800 0.3311135768890381
6553600 0.7051131725311279
13107200 1.2586326599121094
26214400 2.405526876449585
52428800 9.97437572479248


In [7]:
# sklearn
n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # numpy
    t0 = time.time()
    lm = linear_model.LinearRegression()
    lm.fit(x.reshape((n,1)),y)
    t1 = time.time()
    print(n,t1-t0)
    results['sklearn'].loc[n] = t1 - t0
    
    n = n * 2 # Double number of data points

100 0.0857701301574707
200 0.001993417739868164
400 0.0019943714141845703
800 0.0019958019256591797
1600 0.000997781753540039
3200 0.000997781753540039
6400 0.001993417739868164
12800 0.001995563507080078
25600 0.002996206283569336
51200 0.00498652458190918
102400 0.0109710693359375
204800 0.013962268829345703
409600 0.028921127319335938
819200 0.05186009407043457
1638400 0.10671472549438477
3276800 0.19148778915405273
6553600 0.38496994972229004
13107200 0.8377599716186523
26214400 1.5927367210388184
52428800 2.8324201107025146
104857600 17.778105974197388


In [8]:
# Scikit-learn neural network
n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # Neural network
    t0 = time.time()
    nn = MLPRegressor(hidden_layer_sizes=((10,10)),activation='tanh',\
                      solver='lbfgs',max_iter=5000)
    nn.fit(x.reshape((n,1)),y)
    t1 = time.time()
    print(n,t1-t0)
    results['nn'].loc[n] = t1 - t0
    
    n = n * 2 # Double number of data points

100 1.2606847286224365
200 0.662193775177002
400 1.8112378120422363
800 0.8757038116455078
1600 1.7268800735473633
3200 7.130922079086304
6400 2.7506439685821533
12800 12.814718008041382


In [9]:
# Keras nn
n_inputs = 1
nodes = 10
skip = 2

# create neural network model
model = Sequential()
model.add(Dense(n_inputs, input_dim=n_inputs, activation='linear'))
model.add(Dense(nodes, activation='linear'))
model.add(Dense(nodes, activation='tanh'))
model.add(Dense(nodes, activation='tanh'))
model.add(Dense(nodes, activation='linear'))
model.add(Dense(1, activation='linear'))
model.compile(loss="mean_squared_error", optimizer="adam")

n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # Keras 
    t0 = time.time()
    model.fit(x,y,epochs=100,batch_size=int(n/10),verbose=0,shuffle=True)
    t1 = time.time()
    print(n,t1-t0)
    results['keras nn'].loc[n] = t1 - t0
    
    n = n * 2 * skip # Quadruple number of data points

100 2.0555145740509033
400 1.3630356788635254
1600 1.8589675426483154
6400 1.2915656566619873
25600 1.7812342643737793
102400 3.6777689456939697
409600 15.184376955032349


In [10]:
# Keras linear regression
model = Sequential()
model.add(Dense(1, input_dim=1, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # Keras 
    t0 = time.time()
    model.fit(x,y,epochs=100,batch_size=int(n/10),verbose=0,shuffle=True)
    t1 = time.time()
    print(n,t1-t0)
    results['keras'].loc[n] = t1 - t0
    
    n = n * 2 * skip # Quadruple number of data points

100 1.0112953186035156
400 1.0672059059143066
1600 1.1352770328521729
6400 1.024613380432129
25600 1.2698099613189697
102400 1.7482900619506836
409600 3.5664334297180176
1638400 21.902881860733032


For Keras, TF, change input parameters such as epochs, batch size, nodes, number of layers, etc, to see how it affects time

In [None]:
# Tensorflow
learning_rate = 0.01
epochs = 100
tf.get_logger().setLevel('ERROR')

# Weight and Bias variables
m = tf.Variable(tf.zeros(1), name='weight')
a = tf.Variable(tf.zeros(1), name='bias')

def predict(x):
    return x * m + a

def mse(y_true, y_pred):
    return tf.losses.mean_squared_error(y_true,y_pred)

def fit_TF(x,y):
    for epoch in range(1, epochs + 1):
    # Begin GradientTape and optimise
        with tf.GradientTape() as g:
            pred = predict(x)
            loss = mse(y, pred)

        # Compute dw, db
        gradients = g.gradient(loss, [m,a])

        # Update w and b
        optimizer.apply_gradients(zip(gradients, [m,a]))
    
    return (m.numpy(),a.numpy())

# SGD Optimizer
optimizer = tf.optimizers.SGD(learning_rate=learning_rate)

n = 100
start = time.time()

while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # TF
    t0 = time.time()
    fit_TF(x,y)
    t1 = time.time()
    print(n,t1-t0)
    results['tf'].loc[n] = t1 - t0
    
    n = n * 2 * skip

100 0.23536896705627441
400 0.28923797607421875
1600 0.4348611831665039
6400 0.2254025936126709
25600 0.23637771606445312
102400 0.628319263458252
409600 1.3364105224609375


In [None]:
# Scipy
n = 100
start = time.time()
while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # numpy
    t0 = time.time()
    stats.linregress(x,y)
    t1 = time.time()
    print(n,t1-t0)
    results['scipy'].loc[n] = t1 - t0
    
    n = n * 2 # Double number of data points

In [None]:
# gekko
n = 100
start = time.time()
while time.time() <= start + test_time*3:
    x = np.linspace(0,100,n)
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n); y += noise
    
    # gekko
    m = GEKKO(remote=True,server='http://apmonitor.com')
    yg = m.Var(y); xg = m.Param(x); ym = m.Param(y)
    wg = m.FV(); wg.STATUS = 1; bg = m.FV(); bg.STATUS = 1
    m.Equation(yg==wg*xg+bg); m.options.IMODE=2
    m.Minimize((yg-ym)**2)
    
    t0 = time.time()
    try:
        m.solve(disp=False)
        t1 = time.time()
        print(n,t1-t0)
        results['gekko'].loc[n] = t1 - t0
        # get solve time from remote server
        results['gekko'].loc[n] = m.options.SOLVETIME
    except:
        print('Server not available or time-out')
    
    n = n * 2 # Double number of data points

In [None]:
# Pytorch
class linearRegression(torch.nn.Module):
    def __init__(self, inputSize, outputSize):
        super(linearRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        out = self.linear(x)
        return out
    
inputDim = 1        # takes variable 'x' 
outputDim = 1       # takes variable 'y'
learningRate = 0.01 
epochs = 100

model = linearRegression(inputDim, outputDim)
criterion = torch.nn.MSELoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)

def fit_pytorch(x,y):
    x = x.reshape(len(x),1)
    y = y.reshape(len(y),1)
    
    for epoch in range(epochs):
        # Converting inputs and labels to Variable
        inputs = Variable(torch.from_numpy(x).float())
        labels = Variable(torch.from_numpy(y).float())

        # Clear gradient buffers because we don't want any gradient
        #  from previous epoch to carry forward, dont want to 
        #  cummulate gradients
        optimizer.zero_grad()

        # get output from the model, given the inputs
        outputs = model(inputs)

        # get loss for the predicted output
        loss = criterion(outputs, labels)
        print(loss)
        # get gradients w.r.t to parameters
        loss.backward()

        # update parameters
        optimizer.step()
        
n = 100
start = time.time()

while time.time() <= start + test_time:
    x = np.linspace(0,100,n)
    w = 0; b = 0
    y = w * x + b
    
    # Add Gaussian noise
    noise = np.random.normal(0,1.0,n)
    y += noise
    
    # TF
    t0 = time.time()
    fit_pytorch(x,y)
    t1 = time.time()
    print(n,t1-t0)
    results['pytorch'].loc[n] = t1 - t0
    
    n = n * 2 * skip

In [None]:
plt.figure(figsize=(8,6))
plt.subplot(2,1,1)
plt.plot(results['keras'][::skip],'r-',label='keras linear')
plt.plot(results['gekko'],'k:',lw=2,label='gekko')
plt.plot(results['tf'][::skip],'g-',label='tensorflow linear')
plt.plot(results['pytorch'][::skip],'b.-',label='pytorch linear')
plt.plot(results['statsmodel'],'r--',lw=2,label='statsmodel ols')
plt.plot(results['numpy'],'k-',lw=2,label='numpy polyfit')
plt.plot(results['sklearn'],'g:',lw=2,label='sklearn linear')
plt.plot(results['scipy'],'b-',lw=2,label='scipy linregress')
plt.xscale('log')
plt.ylabel('Train Time (sec)')
plt.legend()
xlim = plt.gca().get_xlim()
plt.ylim([0,test_time])
plt.grid()

plt.subplot(2,1,2)
plt.plot(results['nn'],'b-',label='sklearn nn')
plt.plot(results['keras nn'][::skip],'r:',label='keras nn')
plt.xscale('log')
plt.ylabel('Train Time (sec)')
plt.xlabel('Problem Size (samples)')
plt.legend()
plt.xlim(xlim)
plt.ylim([0,test_time])
plt.grid()
plt.savefig('timing_results.png',dpi=600)