In [1]:
#!conda install pandas numpy statsmodels openpyxl

In [2]:
from itertools import product
from typing import List, Tuple

import pandas as pd
import numpy as np

import statsmodels.api as sm
from statsmodels.formula.api import ols


def gen_plan(*factors: Tuple[str, List], res_str: str, replications=3, save_path: str):
    assert(len(factors) > 0)
    assert(replications > 1)
    
    factor_names, factor_levels = zip(*factors)
    cp = np.array(list(product(*factor_levels, range(replications))))
    df_dict = {}
    df_dict[res_str] = ""
    for i in range(len(factor_names)):
        df_dict[factor_names[i]] = cp[:, i]
    df = pd.DataFrame(df_dict)
    df.to_excel(save_path, index=False)

def run_plan(save_path: str, sheet_name=0) -> sm.stats.anova_lm:
    df = pd.read_excel(save_path, sheet_name=sheet_name)
    key_it = iter(df.keys())
    modelString = f"{next(key_it)} ~ C({next(key_it)})"
    for key in key_it:
        modelString += f"*C({key})"
    model = ols(modelString, df).fit()

    return sm.stats.anova_lm(model)

# example usage:
# gen_plan(('Processors', ['A', 'B']), ('Workloads', ['C', 'D']), res_str="Code_Size", replications=2, save_path='test.xlsx')
# fill in experiments results in spreadsheet
# run_plan('test.xlsx', 0)

# 2^k Factorial
### "executorMemory" fixed at 8Gi for all experiments

## FASHIONMNIST, FASHIONMNISTCNN

In [3]:
run_plan('fashion-2k.xlsx', 'AvgProcessorUtil')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(dataParallelism),1.0,19.859178,19.859178,15.828473,0.001079753
C(executorCores),1.0,3693.421918,3693.421918,2943.789068,1.43555e-19
C(batchSize),1.0,27.046174,27.046174,21.556766,0.0002707718
C(maxEpoch),1.0,0.00468,0.00468,0.00373,0.9520546
C(dataParallelism):C(executorCores),1.0,1.365791,1.365791,1.088584,0.3122938
C(dataParallelism):C(batchSize),1.0,2.498171,2.498171,1.991132,0.1773711
C(executorCores):C(batchSize),1.0,18.955864,18.955864,15.1085,0.001309238
C(dataParallelism):C(maxEpoch),1.0,0.010549,0.010549,0.008408,0.9280795
C(executorCores):C(maxEpoch),1.0,0.00468,0.00468,0.00373,0.9520546
C(batchSize):C(maxEpoch),1.0,0.792855,0.792855,0.631934,0.4382806


In [4]:
run_plan('fashion-2k.xlsx', 'AvgAccuracy')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(dataParallelism),1.0,124.1415,124.1415,1.828161e+27,6.759949e-210
C(executorCores),1.0,8.077936e-28,8.077936e-28,0.01189591,0.9145044
C(batchSize),1.0,1.386113,1.386113,2.041248e+25,2.798286e-194
C(maxEpoch),1.0,4.437221,4.437221,6.534439e+25,2.537417e-198
C(dataParallelism):C(executorCores),1.0,1.8175360000000002e-27,1.8175360000000002e-27,0.0267658,0.8720927
C(dataParallelism):C(batchSize),1.0,1.522513,1.522513,2.242117e+25,1.3206660000000001e-194
C(executorCores):C(batchSize),1.0,5.04871e-29,5.04871e-29,0.0007434944,0.9785839
C(dataParallelism):C(maxEpoch),1.0,0.4970045,0.4970045,7.319099e+24,1.0242300000000001e-190
C(executorCores):C(maxEpoch),1.0,1.029476e-27,1.029476e-27,0.0151605,0.9035392
C(batchSize):C(maxEpoch),1.0,0.0142805,0.0142805,2.103007e+23,2.204615e-178


In [5]:
run_plan('fashion-2k.xlsx', 'AvgTrainTime')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(dataParallelism),1.0,46774090000.0,46774090000.0,14815.444678,3.604119e-25
C(executorCores),1.0,18024330000.0,18024330000.0,5709.110115,7.316912000000001e-22
C(batchSize),1.0,586712800.0,586712800.0,185.838161,3.17622e-10
C(maxEpoch),1.0,7906012000.0,7906012000.0,2504.187248,5.197865e-19
C(dataParallelism):C(executorCores),1.0,4735585000.0,4735585000.0,1499.971336,3.037928e-17
C(dataParallelism):C(batchSize),1.0,2967698.0,2967698.0,0.940003,0.3467062
C(executorCores):C(batchSize),1.0,132200900.0,132200900.0,41.873919,7.720474e-06
C(dataParallelism):C(maxEpoch),1.0,2263073000.0,2263073000.0,716.81626,1.024128e-14
C(executorCores):C(maxEpoch),1.0,780151300.0,780151300.0,247.108798,3.776148e-11
C(batchSize):C(maxEpoch),1.0,10074660.0,10074660.0,3.191096,0.0930018


## CIFAR10, CIFAR10CNN

In [6]:
run_plan('cifar10-2k.xlsx', 'AvgProcessorUtil')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(dataParallelism),1.0,16.940565,16.940565,21.680473,0.0002634734
C(executorCores),1.0,4167.000518,4167.000518,5332.911877,1.260414e-21
C(batchSize),1.0,17.244596,17.244596,22.069571,0.0002419248
C(maxEpoch),1.0,0.250809,0.250809,0.320984,0.5788781
C(dataParallelism):C(executorCores),1.0,2.702231,2.702231,3.458306,0.08142194
C(dataParallelism):C(batchSize),1.0,5.879878,5.879878,7.525046,0.01443494
C(executorCores):C(batchSize),1.0,4.359366,4.359366,5.579101,0.0311852
C(dataParallelism):C(maxEpoch),1.0,0.183467,0.183467,0.2348,0.6345541
C(executorCores):C(maxEpoch),1.0,0.001785,0.001785,0.002284,0.9624702
C(batchSize):C(maxEpoch),1.0,0.010476,0.010476,0.013408,0.9092592


In [7]:
run_plan('cifar10-2k.xlsx', 'AvgAccuracy')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(dataParallelism),1.0,1279.371,1279.371,4.893178e+28,2.566437e-221
C(executorCores),1.0,3.1554440000000003e-28,3.1554440000000003e-28,0.01206855,0.9138889
C(batchSize),1.0,89.72641,89.72641,3.431744e+27,4.3846919999999995e-212
C(maxEpoch),1.0,117.7499,117.7499,4.503551e+27,4.984461e-213
C(dataParallelism):C(executorCores),1.0,2.4738680000000002e-27,2.4738680000000002e-27,0.09461743,0.762355
C(dataParallelism):C(batchSize),1.0,129.4762,129.4762,4.952046e+27,2.332281e-213
C(executorCores):C(batchSize),1.0,0.0,0.0,0.0,1.0
C(dataParallelism):C(maxEpoch),1.0,0.246402,0.246402,9.424078e+24,1.3556500000000001e-191
C(executorCores):C(maxEpoch),1.0,5.129568e-28,5.129568e-28,0.01961894,0.8903547
C(batchSize):C(maxEpoch),1.0,0.00245,0.00245,9.370456e+22,1.418968e-175


In [8]:
run_plan('cifar10-2k.xlsx', 'AvgTrainTime')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(dataParallelism),1.0,2298950000000.0,2298950000000.0,23105.58316,1.032884e-26
C(executorCores),1.0,824917100000.0,824917100000.0,8290.825188,3.723488e-23
C(batchSize),1.0,3729589.0,3729589.0,0.037484,0.848919
C(maxEpoch),1.0,288103000000.0,288103000000.0,2895.577187,1.6371759999999998e-19
C(dataParallelism):C(executorCores),1.0,278052900000.0,278052900000.0,2794.569394,2.171729e-19
C(dataParallelism):C(batchSize),1.0,109274700.0,109274700.0,1.098265,0.3102208
C(executorCores):C(batchSize),1.0,87601800.0,87601800.0,0.880441,0.3620305
C(dataParallelism):C(maxEpoch),1.0,98404550000.0,98404550000.0,989.01439,8.161604e-16
C(executorCores):C(maxEpoch),1.0,33848570000.0,33848570000.0,340.194891,3.322069e-12
C(batchSize):C(maxEpoch),1.0,28171150.0,28171150.0,0.283134,0.6019657


# 2-Factor Full Factorial
### "executorMemory" fixed at 8Gi for all experiments
### "dataParallelism" fixed at 4 for all experiments
### "maxEpoch" fixed at 4 for all experiments

## FASHIONMNIST, FASHIONMNISTCNN

In [9]:
run_plan('fashion-full.xlsx', 'AvgProcessorUtil')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(executorCores),2.0,3620.011449,1810.005724,311.735779,6.728009e-18
C(batchSize),3.0,28.444028,9.481343,1.632964,0.2080621
C(executorCores):C(batchSize),6.0,22.067002,3.677834,0.63343,0.702245
Residual,24.0,139.349219,5.806217,,


In [10]:
run_plan('fashion-full.xlsx', 'AvgAccuracy')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(executorCores),2.0,3.685558e-27,1.842779e-27,0.06186441,0.9401597
C(batchSize),3.0,26.16355,8.721183,2.927811e+26,6.979365e-307
C(executorCores):C(batchSize),6.0,4.723699e-27,7.872832000000001e-28,0.02643008,0.9999025
Residual,24.0,7.148973e-25,2.9787389999999996e-26,,


In [11]:
run_plan('fashion-full.xlsx', 'AvgTrainTime')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(executorCores),2.0,5093723000.0,2546861000.0,16310.952061,2.4922439999999995e-38
C(batchSize),3.0,1000924000.0,333641300.0,2136.750261,2.917843e-29
C(executorCores):C(batchSize),6.0,176020600.0,29336770.0,187.88249,5.894695e-19
Residual,24.0,3747462.0,156144.2,,


## CIFAR10, CIFAR10CNN

In [12]:
run_plan('cifar10-full.xlsx', 'AvgProcessorUtil')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(executorCores),2.0,3702.217611,1851.108806,603.133865,3.037661e-21
C(batchSize),3.0,25.091479,8.363826,2.725127,0.06649304
C(executorCores):C(batchSize),6.0,28.850295,4.808383,1.566682,0.2000053
Residual,24.0,73.65962,3.069151,,


In [13]:
run_plan('cifar10-full.xlsx', 'AvgAccuracy')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(executorCores),2.0,1.009742e-28,5.04871e-29,0.005852231,0.994166
C(batchSize),3.0,275.3125,91.77083,1.063765e+28,0.0
C(executorCores):C(batchSize),6.0,8.361926000000001e-28,1.3936540000000001e-28,0.0161546,0.999977
Residual,24.0,2.070476e-25,8.626983000000001e-27,,


In [14]:
run_plan('cifar10-full.xlsx', 'AvgTrainTime')

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(executorCores),2.0,130433000000.0,65216480000.0,7158.868486,4.822962e-34
C(batchSize),3.0,2781781000.0,927260400.0,101.786167,8.72244e-14
C(executorCores):C(batchSize),6.0,823300500.0,137216700.0,15.0624,4.312979e-07
Residual,24.0,218637300.0,9109886.0,,
