In [1]:
#!conda install pandas numpy statsmodels openpyxl

In [2]:
from itertools import product
from typing import List, Tuple

import pandas as pd
import numpy as np

import statsmodels.api as sm
from statsmodels.formula.api import ols


def gen_plan(*factors: Tuple[str, List], res_str: str, replications=3, save_path: str):
    assert(len(factors) > 0)
    assert(replications > 1)
    
    factor_names, factor_levels = zip(*factors)
    cp = np.array(list(product(*factor_levels, range(replications))))
    df_dict = {}
    df_dict[res_str] = ""
    for i in range(len(factor_names)):
        df_dict[factor_names[i]] = cp[:, i]
    df = pd.DataFrame(df_dict)
    df.to_excel(save_path, index=False)

def run_plan(save_path: str, sheet_name=0) -> sm.stats.anova_lm:
    df = pd.read_excel(save_path, sheet_name=sheet_name)
    key_it = iter(df.keys())
    modelString = f"{next(key_it)} ~ C({next(key_it)})"
    for key in key_it:
        modelString += f"*C({key})"
    model = ols(modelString, df).fit()

    return sm.stats.anova_lm(model)

# example usage:
# gen_plan(('Processors', ['A', 'B']), ('Workloads', ['C', 'D']), res_str="Code_Size", replications=2, save_path='test.xlsx')
# fill in experiments results in spreadsheet
# run_plan('test.xlsx', 0)

# 2^k Factorial
### "executorMemory" fixed at 8Gi for all experiments