forked from HBPMedical/algorithm-repository
-
Notifications
You must be signed in to change notification settings - Fork 1
/
linear_regression.py
executable file
·72 lines (51 loc) · 2.04 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
from mip_helper import io_helper
import logging
import json
from pandas import DataFrame
from statsmodels.formula.api import ols
DEFAULT_DOCKER_IMAGE = "python-linear-regression"
def main():
# Configure logging
logging.basicConfig(level=logging.INFO)
# Read inputs
inputs = io_helper.fetch_data()
dep_var = inputs["data"]["dependent"][0]
inped_vars = inputs["data"]["independent"]
# Check dependent variable type (should be continuous)
if dep_var["type"]["name"] not in ["integer", "real"]:
logging.warning("Dependent variable should be continuous !")
return None
# Extract data and parameters from inputs
data = format_data(inputs["data"])
# Compute linear-regression and generate PFA output
linear_regression_results = format_output(compute_linear_regression(dep_var, inped_vars, data))
# Store results
io_helper.save_results(linear_regression_results, '', 'application/json')
def format_data(input_data):
all_vars = input_data["dependent"] + input_data["independent"]
data = {v["name"]: v["series"] for v in all_vars}
return data
def format_output(statsmodels_dict):
return json.dumps(DataFrame.from_dict(statsmodels_dict).transpose().fillna("NaN").to_dict())
def compute_linear_regression(dep_var, indep_vars, data):
formula = generate_formula(dep_var, indep_vars)
logging.info("Formula: %s" % formula)
lm = ols(data=data, formula=formula).fit()
logging.info(lm.summary())
return {
"coef": dict(lm.params),
"std_err": dict(lm.bse),
"t_values": dict(lm.tvalues),
"p_values": dict(lm.pvalues)
}
def generate_formula(dep_var, indep_vars):
op = " + "
dep_var = dep_var["name"]
indep_vars = [v["name"] if v["type"]["name"] in ["integer", "real"]
else str.format("C(%s)" % v["name"]) for v in indep_vars]
indep_vars = op.join(indep_vars)
indep_vars = indep_vars.strip(op)
return str.format("%s ~ %s" % (dep_var, indep_vars))
if __name__ == '__main__':
main()