forked from HBPMedical/algorithm-repository
-
Notifications
You must be signed in to change notification settings - Fork 1
/
anova.py
executable file
·85 lines (61 loc) · 2.38 KB
/
anova.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3
from io_helper import io_helper
import logging
import json
from pandas import DataFrame
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
DESIGN_PARAM = "design"
DEFAULT_DESIGN = "factorial"
DEFAULT_DOCKER_IMAGE = "python-anova"
def main():
# Configure logging
logging.basicConfig(level=logging.INFO)
# Read inputs
inputs = io_helper.fetch_data()
dep_var = inputs["data"]["dependent"][0]
inped_vars = inputs["data"]["independent"]
design = get_parameter(inputs["parameters"], DESIGN_PARAM)
# Check dependent variable type (should be continuous)
if dep_var["type"]["name"] not in ["integer", "real"]:
logging.warning("Dependent variable should be continuous !")
return None
# Extract data and parameters from inputs
data = format_data(inputs["data"])
# Compute anova and generate PFA output
anova_results = format_output(compute_anova(dep_var, inped_vars, data, design).to_dict())
# Store results
io_helper.save_results(anova_results, '', 'application/json')
def format_data(input_data):
all_vars = input_data["dependent"] + input_data["independent"]
data = {v["name"]: v["series"] for v in all_vars}
return data
def format_output(statsmodels_dict):
return json.dumps(DataFrame.from_dict(statsmodels_dict).transpose().fillna("NaN").to_dict())
def get_parameter(params_list, param_name):
for p in params_list:
if p["name"] == param_name:
return p["value"]
return DEFAULT_DESIGN
def compute_anova(dep_var, indep_vars, data, design='factorial'):
formula = generate_formula(dep_var, indep_vars, design)
logging.info("Formula: %s" % formula)
lm = ols(data=data, formula=formula).fit()
logging.info(lm.summary())
return anova_lm(lm)
def generate_formula(dep_var, indep_vars, design):
if design == 'additive':
op = " + "
elif design == 'factorial':
op = " * "
else:
logging.error("Invalid design parameter : %s" % design)
return None
dep_var = dep_var["name"]
indep_vars = [v["name"] if v["type"]["name"] in ["integer", "real"]
else str.format("C(%s)" % v["name"]) for v in indep_vars]
indep_vars = op.join(indep_vars)
indep_vars = indep_vars.strip(op)
return str.format("%s ~ %s" % (dep_var, indep_vars))
if __name__ == '__main__':
main()