# Online learning to predict performance 

In [32]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


# incremental training models

# Classif
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import Perceptron, SGDClassifier, PassiveAggressiveClassifier

# Regression
from sklearn.linear_model import SGDRegressor, PassiveAggressiveRegressor
from sklearn.model_selection import train_test_split


from sklearn.preprocessing import StandardScaler

## Read the data

In [55]:
df = pd.read_pickle('100experiments.pkl')


# Set the acc to the final accuracy
df['acc'] = df.accuracy.map(lambda a: a[-1])

# Set the time to the sum of the epoch durations
df['time'] = df.epoch_duration.map(sum)

# Set the parallelism to the first since it is constant
df.parallelism = df.parallelism.map(lambda l:l[0])

# change -1 to inf so the order is right in the plot


# ks = []
# for p, batch, k in zip(df.parallelism, df.batch_size, df.k):
#     ks.append(int((60000/p)/batch) if k == -1 else k)
    


In [8]:
df.columns

Index(['id', 'model_type', 'batch_size', 'epochs', 'dataset', 'lr',
       'function_name', 'default_parallelism', 'static_parallelism',
       'validate_every', 'k', 'goal_accuracy', 'validation_loss', 'accuracy',
       'train_loss', 'parallelism', 'epoch_duration', 'acc', 'time'],
      dtype='object')

## Try to fit the classifiers and compare performance

In [150]:
# divide into train and test
x_train, x_test = train_test_split(df, test_size=0.2, random_state=42)

y_train_acc, y_train_time = x_train['acc'], x_train['time']
y_test_acc, y_test_time = x_test['acc'], x_test['time']

x_train = x_train[['batch_size', 'lr', 'default_parallelism', 'k']]
x_test = x_test[['batch_size', 'lr', 'default_parallelism', 'k']]

In [106]:
x.k.value_counts()

 64    20
 16    20
 8     20
 2     20
-1     20
Name: k, dtype: int64

In [158]:
df.loc[(df.batch_size==64) & (df.parallelism==8)]

Unnamed: 0,id,model_type,batch_size,epochs,dataset,lr,function_name,default_parallelism,static_parallelism,validate_every,k,goal_accuracy,validation_loss,accuracy,train_loss,parallelism,epoch_duration,acc,time
15,2158db31,example,64,5,mnist,0.01,lenet,8,True,1,8,100,"[1.9118105036437891, 0.7927961034379947, 0.676...","[52.29, 77.91, 82.99, 84.98, 85.95, 85.95]","[2.227595477534665, 1.1232733184181982, 0.7684...",8,"[11.900906525, 8.572293294, 6.686199387, 7.045...",85.95,41.072249
26,3da16263,example,64,5,mnist,0.01,lenet,8,True,1,16,100,"[1.2495136712766757, 0.5140166210520799, 0.424...","[57.75, 81.66, 84.05, 85.39, 85.89, 85.89]","[1.9450298844448601, 0.8569566598123249, 0.521...",8,"[10.331783773, 7.830437498, 5.816976932, 6.043...",85.89,38.855955
27,402f1390,example,64,5,mnist,0.01,lenet,8,True,1,64,100,"[1.1337818252812526, 0.21182848930738535, 0.13...","[69.28, 93.46, 95.72, 96.63, 97.27, 97.27]","[2.0933615577811993, 0.548363882081697, 0.2325...",8,"[11.314345539, 8.384212163, 6.322548479, 6.772...",97.27,39.356012
68,9b2e6e25,example,64,5,mnist,0.01,lenet,8,True,1,2,100,"[2.277480520260562, 2.165740127016784, 1.27580...","[35.08, 43.05, 64.61, 82.2, 87.79, 87.79]","[2.2910737003815376, 2.2388537331657896, 1.825...",8,"[16.18150107, 13.192074094, 11.117174694, 21.0...",87.79,76.804375
95,f7733184,example,64,5,mnist,0.01,lenet,8,True,1,-1,100,"[1.4588187522948928, 0.7854528687182506, 0.453...","[45.1, 73.47, 83.86, 85.64, 86.25, 86.25]","[2.0253294189056503, 1.097543101261815, 0.7000...",8,"[13.80060439, 9.216515436, 7.289033032, 9.1800...",86.25,48.214698


In [58]:
# fit the passive aggressive
from sklearn.metrics import mean_squared_error

pa = PassiveAggressiveRegressor()
sg = SGDRegressor()

for clf in [pa, sg]:
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(mean_squared_error(y_pred, y_test))

594.164592719591
459.39315124290204


In [104]:
# online training
sg = PassiveAggressiveRegressor()
for feature, label in zip(x_train, y_train):
#     print(feature.reshape(-1,), np.array([label]))
    sg.partial_fit(feature.reshape(1, -1), np.array([label]))

In [100]:
y_pred = sg.predict(x_test)
print(mean_squared_error(y_pred, y_test))

818.1059058685958


In [105]:
d = s.transform(np.array([128, 0.01, 8, 16]).reshape(1,-1))
d
sg.predict(d)

array([82.50197023])

In [126]:
[[samples.batch_size, samples.lr, samples.default_parallelism, k] for k in Ks]

[[256.0, 0.01, 1.0, 2],
 [256.0, 0.01, 1.0, 8],
 [256.0, 0.01, 1.0, 16],
 [256.0, 0.01, 1.0, 64],
 [256.0, 0.01, 1.0, -1]]

In [123]:
Ks = [2,8, 16, 64, -1]

samples = x.iloc[2]
# [[[s.batch_size, s.lr, s.default_parallelism, K] for s in] for K in Ks]

## Create the class for evaluating possible K levels

In [167]:
class KOptimizer:
    
    Ks = [2,8, 16, 64, -1]
    
    def __init__(self, X, y_acc, y_time):
        self.scaler = StandardScaler()
        data = self.scaler.fit_transform(X)
        self.time_reg = PassiveAggressiveRegressor(random_state=42)
        self.acc_reg = PassiveAggressiveRegressor(random_state=42)
        
        # fit both regressors
        self.time_reg.fit(data, y_time)
        self.acc_reg.fit(data, y_acc)
        
    def __call__(self, X, y):
        _x = np.array([[X.batch_size, X.lr, X.default_parallelism, k] for k in self.Ks])
        stdata = self.scaler.transform(_x)
        preds_acc, preds_time = self.acc_reg.predict(stdata), self.time_reg.predict(stdata)
        print(_x, preds_acc, preds_time)
        
    def update(self, x: np.ndarray, time: float, acc: float):
        _x = self.scaler.transform(x.reshape(1, -1))
        self.time_reg.partial_fit(_x, np.array([time]))
        self.acc_reg.partial_fit(_x, np.array([acc]))

In [173]:
# op = KOptimizer(x_train, y_train_acc, y_train_time)
preds = op(df.iloc[15], df.iloc[15])
preds

[[ 6.4e+01  1.0e-02  8.0e+00  2.0e+00]
 [ 6.4e+01  1.0e-02  8.0e+00  8.0e+00]
 [ 6.4e+01  1.0e-02  8.0e+00  1.6e+01]
 [ 6.4e+01  1.0e-02  8.0e+00  6.4e+01]
 [ 6.4e+01  1.0e-02  8.0e+00 -1.0e+00]] [46.42098938 46.29519273 46.12746386 45.12109066 46.4838877 ] [ 0.38979026 -0.17957509 -0.93872889 -5.49365172  0.67447294]


In [174]:
for f, t, a in zip(s.transform(x_test), y_test_time, y_test_acc):
    print(t)
    op.update(f, t, a)

42.964120499
43.45463995
48.608870431
49.348767040000006
125.67063405100001
157.845865048
146.150401646
77.11826426100001
111.183158916
91.873677077
193.47928402899998
30.571016984000003
74.792759029
127.452963605
33.157986481
25.206731129
49.644195835000005
129.857463547
44.282958637
242.788765065
