From af6a9d9365d96ec3e5644d65ab7541e02f9536cc Mon Sep 17 00:00:00 2001 From: Daniel Abercrombie Date: Mon, 3 Dec 2018 15:13:26 -0500 Subject: [PATCH 1/6] Show confirmation on submission --- workflowwebtools/__init__.py | 2 +- workflowwebtools/web/static/js/submit.js | 22 +++++++++++++++++-- .../web/templates/workflowtables2.html | 1 + workflowwebtools/workflowtools.py | 8 ++++++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/workflowwebtools/__init__.py b/workflowwebtools/__init__.py index 63bbc16..f4bbbbb 100644 --- a/workflowwebtools/__init__.py +++ b/workflowwebtools/__init__.py @@ -4,6 +4,6 @@ :author: Daniel Abercrombie """ -__version__ = '0.8.2' +__version__ = '0.8.3' __all__ = [] diff --git a/workflowwebtools/web/static/js/submit.js b/workflowwebtools/web/static/js/submit.js index ff07792..b1079ad 100644 --- a/workflowwebtools/web/static/js/submit.js +++ b/workflowwebtools/web/static/js/submit.js @@ -313,6 +313,18 @@ function buildSubmit (workflow, sitesToRun) { return output; } + +function showStatus(workflow) { + $.ajax({ + url: "/getstatus", + data: {"workflow": workflow}, + success: function (status) { + document.getElementById("actionstatus").innerHTML = "Action: " + status.status; + } + }); +} + + function makeForm(workflow) { $.ajax({ @@ -326,14 +338,19 @@ function makeForm(workflow) { form.action = "javascript:;"; form.onsubmit = function () { submission = buildSubmit(workflow, params.sitestorun); - if (confirm("Will submit " + JSON.stringify(submission))) + if (confirm("Will submit " + JSON.stringify(submission))) { $.ajax({ url: "/submit2", type: "POST", dataType: "json", contentType : 'application/json', - data: JSON.stringify({documents: submission}) + data: JSON.stringify({documents: submission}), + success: function () { + showStatus(workflow); + alert('Action Submitted'); + } }); + } }; addOptions(form, params); @@ -361,4 +378,5 @@ function makeForm(workflow) { function prepareSubmit (workflow) { makeForm(workflow); setReasons(); + showStatus(workflow); }; diff --git a/workflowwebtools/web/templates/workflowtables2.html b/workflowwebtools/web/templates/workflowtables2.html index c3964d5..50557b1 100644 --- a/workflowwebtools/web/templates/workflowtables2.html +++ b/workflowwebtools/web/templates/workflowtables2.html @@ -18,6 +18,7 @@

${workflow}

+
Global Errors
diff --git a/workflowwebtools/workflowtools.py b/workflowwebtools/workflowtools.py index c1311a4..887f258 100644 --- a/workflowwebtools/workflowtools.py +++ b/workflowwebtools/workflowtools.py @@ -185,6 +185,10 @@ def get_status(self, workflow): return "none" return "acted" if status else "pending" + @cherrypy.expose + @cherrypy.tools.json_out() + def getstatus(self, workflow): + return {'status': self.get_status(workflow).capitalize()} def get(self, workflow): self.wflock.acquire() @@ -231,10 +235,12 @@ def getworkflows(self, prepid): @cherrypy.expose @cherrypy.tools.json_in() + @cherrypy.tools.json_out() def submit2(self): input_json = cherrypy.request.json manageactions.submit2(input_json['documents']) - return 'Done' + self.update_statuses() + return {'message': 'Done'} @cherrypy.expose From 042812d139a84be2096f882ac4c039177eec1cd2 Mon Sep 17 00:00:00 2001 From: dabercro Date: Wed, 5 Dec 2018 16:13:43 -0500 Subject: [PATCH 2/6] Working ML from Abhishek --- workflowwebtools/predict/evaluate.py | 177 ++++++++++++++++++++++++++- 1 file changed, 174 insertions(+), 3 deletions(-) diff --git a/workflowwebtools/predict/evaluate.py b/workflowwebtools/predict/evaluate.py index 7394b5a..4b7dab9 100644 --- a/workflowwebtools/predict/evaluate.py +++ b/workflowwebtools/predict/evaluate.py @@ -1,12 +1,181 @@ +# pylint: disable=missing-docstring, too-complex, invalid-name, too-many-branches, too-many-locals + """ A module that evaluates a model and returns the prediction """ -#from cmstoolbox import sitereadiness +import os +import random +import itertools + +import numpy as np +import pandas as pd +import keras as K + + +def modified_site_name(site): + site_name = site.split('_')[:-1] + s = '' + for i in site_name: + s = s+i+'_' + s = s.rstrip('_') + return s + +def build_table(df, template_table): + sparse_df = template_table.copy() + + tier0_sites, tier1_sites, tier2_sites, tier3_sites = [], [], [], [] + for i in sparse_df.keys(): + if i != 'NA': + if i[1] == '0': + tier0_sites.append(i) + elif i[1] == '1': + tier1_sites.append(i) + elif i[1] == '2': + tier2_sites.append(i) + elif i[1] == '3': + tier3_sites.append(i) + + n0, n1, n2, n3 = len(tier0_sites), len(tier1_sites), len(tier2_sites), len(tier3_sites) + for exit_code, site_dict in zip(df.keys(), df.values()): + exit_code = int(exit_code) + for site, count in site_dict.items(): + + chosen_site = None + site_present_in_training_data = site in sparse_df.keys() + if not site_present_in_training_data: + + site = modified_site_name(site) + cond = site in sparse_df.keys() + if cond: + chosen_site = site + + print "Detected a site %s which was not present in the training dataset" % site + print "We would use a proxy site for this based on whether it is T1, T2 or T3" + tier = site.split("_")[0][1] + if chosen_site is None: + if tier == '1': + chosen_num = random.randint(0, n1-1) + chosen_site = tier1_sites[chosen_num] + elif tier == '2': + chosen_num = random.randint(0, n2-1) + chosen_site = tier2_sites[chosen_num] + print 'The chosen site is ', chosen_site + elif tier == '3': + chosen_num = random.randint(0, n3-1) + chosen_site = tier3_sites[chosen_num] + print 'The chosen site is ', chosen_site + elif tier == '0': + chosen_num = random.randint(0, n0-1) + chosen_site = tier0_sites[chosen_num] + print 'The chosen site is ', chosen_site + else: + continue + if chosen_site is None: + chosen_site = site + + if np.isnan(count) or np.isnan(sparse_df.loc[exit_code, chosen_site]): + sparse_df.loc[exit_code, chosen_site] = 0 + else: + sparse_df.loc[exit_code, chosen_site] = count + + return sparse_df + + +def list_of_sites(x): + return [item.keys() for item in x] or ['NA'] + +def build_table_flatten(x): + d_outer = [] + + for column in x: + + for item in x[column]: + d_outer.append(item) + + return d_outer + + +def pred(errors): + # Needs all of these files to be local + for filename in ['sparse_table.csv', 'actionfile.txt', 'my_model.h5']: + if not os.path.exists(filename): + return ['TBD'] + + df = pd.DataFrame(columns=('workflow', 'errors')) + base_data = [] + for i in errors: + it = i.items() + base_data.extend(it) + + for i, dat in enumerate(base_data): + workflow, error_dict = dat[0], dat[1] + + if 'NotReported' in error_dict: + error_dict[-1] = error_dict.pop('NotReported') + + df.loc[i] = [workflow, error_dict] + + template_table = pd.read_csv("sparse_table.csv").set_index("Unnamed: 0") + template_table[:] = 0 + df['errors_sites_exit_codes'] = df['errors'].apply(lambda x: x.keys() if x else ['0']) + + df['errors_sites_dict'] = df['errors'].apply(lambda x: x.values() if x else [{'NA': 0}]) + + df['errors_sites_list'] = df['errors_sites_dict'].apply(list_of_sites) + + list2d = df['errors_sites_exit_codes'].tolist() + + sites_exit_codes = sorted(set(list(itertools.chain.from_iterable(list2d))), key=int) + sites_exit_codes = [str(x) for x in sites_exit_codes] + + list2d_step1 = df['errors_sites_list'].tolist() + list2d_step2 = list(itertools.chain.from_iterable(list2d_step1)) + site_names = sorted(set(list(itertools.chain.from_iterable(list2d_step2)))) + site_names = [str(x) for x in site_names] + + df['table_sites'] = df['errors'].apply( + lambda x: build_table(x, template_table)) + df['table_sites_flatten'] = df['table_sites'].apply(build_table_flatten) + x_dataframe = df.loc[:, "table_sites_flatten"] + x_matrix = x_dataframe.values + feature_size = len(x_matrix[0]) + res = [] + clip_length = len(x_matrix[0]) + for i in x_matrix: + i_clipped = i[:clip_length] + res.extend(i_clipped) + + res = np.asarray(res).reshape(-1, feature_size) + mask = ~np.any(pd.isnull(res), axis=1) + res = res[mask] + + model = K.models.load_model('my_model.h5') + predicted_actions_encoded = model.predict(np.array(np.asfarray(res))) + predicted_actions_encoded = np.round(predicted_actions_encoded) + + action_code_dictionary = {} + a = np.genfromtxt("actionfile.txt", delimiter='\t', dtype=str) + b = list(i.split(' ') for i in a) + for i in b: + + action_code_dictionary[int(i[1])] = i[0] + + predicted_actions = [] + for i in predicted_actions_encoded: + pos = np.argmax(i) + + if pos in action_code_dictionary: + predicted_actions.append(action_code_dictionary[pos]) + else: + predicted_actions.append(-1) + + K.backend.clear_session() + return predicted_actions -def predict(errors): # pylint: disable=unused-argument +def predict(wf_obj): """ Takes the errors for a workflow and makes an action prediction :param workflowwebtool.workflowinfo.WorkflowInfo errors: @@ -15,4 +184,6 @@ def predict(errors): # pylint: disable=unused-argument :rtype: dict """ - return {'Action': 'TBD'} + return { + 'Action': pred([wf_obj.get_errors(True)])[0] + } From ea94b6732ffd554cc2908e136d78739181ceb284 Mon Sep 17 00:00:00 2001 From: dabercro Date: Wed, 5 Dec 2018 16:19:42 -0500 Subject: [PATCH 3/6] More packages to install for the machine learning. --- setup.py | 5 ++++- workflowwebtools/predict/evaluate.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 1ed36f9..b7a8980 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,9 @@ 'validators', 'tabulate', 'pymongo<3.5.0', - 'cx_Oracle' + 'cx_Oracle', + 'pandas', + 'keras', + 'tensorflow' ] ) diff --git a/workflowwebtools/predict/evaluate.py b/workflowwebtools/predict/evaluate.py index 4b7dab9..21a8dc2 100644 --- a/workflowwebtools/predict/evaluate.py +++ b/workflowwebtools/predict/evaluate.py @@ -178,7 +178,7 @@ def pred(errors): def predict(wf_obj): """ Takes the errors for a workflow and makes an action prediction - :param workflowwebtool.workflowinfo.WorkflowInfo errors: + :param workflowwebtool.workflowinfo.WorkflowInfo wf_obj: The WorkflowInfo object that we want to perform a prediction on :returns: Prediction results to be passed back to a browser :rtype: dict From 28aff40a01903ecd97ad28e679cc1008b9ced155 Mon Sep 17 00:00:00 2001 From: Daniel Abercrombie Date: Wed, 5 Dec 2018 16:31:47 -0500 Subject: [PATCH 4/6] Bump version --- workflowwebtools/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflowwebtools/__init__.py b/workflowwebtools/__init__.py index f4bbbbb..35abadf 100644 --- a/workflowwebtools/__init__.py +++ b/workflowwebtools/__init__.py @@ -4,6 +4,6 @@ :author: Daniel Abercrombie """ -__version__ = '0.8.3' +__version__ = '0.9.0' __all__ = [] From e59f48b46273b5ef1d9f4595d911ca91ed5003e5 Mon Sep 17 00:00:00 2001 From: Daniel Abercrombie Date: Thu, 6 Dec 2018 13:58:23 -0500 Subject: [PATCH 5/6] Don't use session DB for the max errors calculation --- workflowwebtools/__init__.py | 2 +- workflowwebtools/classifyerrors.py | 38 +++++++++++++++++------------- workflowwebtools/workflowtools.py | 4 ++-- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/workflowwebtools/__init__.py b/workflowwebtools/__init__.py index 35abadf..28b7412 100644 --- a/workflowwebtools/__init__.py +++ b/workflowwebtools/__init__.py @@ -4,6 +4,6 @@ :author: Daniel Abercrombie """ -__version__ = '0.9.0' +__version__ = '0.9.1' __all__ = [] diff --git a/workflowwebtools/classifyerrors.py b/workflowwebtools/classifyerrors.py index 1673255..5c8e5d3 100644 --- a/workflowwebtools/classifyerrors.py +++ b/workflowwebtools/classifyerrors.py @@ -8,10 +8,12 @@ import re +from collections import defaultdict + from .procedures import PROCEDURES from .globalerrors import check_session -def classifyerror(errorcode, workflow, session=None): +def classifyerror(errorcode, workflow): """ Return the most relevant characteristics of an error code for this session. This will include things like: @@ -24,8 +26,7 @@ def classifyerror(errorcode, workflow, session=None): More error types should be added to this function as needed :param int errorcode: The error code that we want to classify - :param str workflow: the workflow that we want to get the errors from - :param cherrypy.Session session: Is the user's cherrypy session + :param workflowinfo.WorkflowInfo workflow: the workflow that we want to get the errors from :returns: A tuple of strings describing the key characteristics of the errorcode. These strings are good for printing directly in web browsers. The first string is the types of errors reported with this error code. @@ -36,7 +37,7 @@ def classifyerror(errorcode, workflow, session=None): procedure = PROCEDURES.get(errorcode, {}) - logs = check_session(session).get_workflow(workflow).get_explanation(str(errorcode)) + logs = workflow.get_explanation(str(errorcode)) error_re = re.compile(r'[\w\s]+ \(Exit code: (\d+)\)') error_types = {} @@ -79,26 +80,29 @@ def classifyerror(errorcode, workflow, session=None): additional_actions_string.replace(' |br| |br| ', '
')) -def get_max_errorcode(workflow, session=None): +def get_max_errorcode(workflow): """ Get the errorcode with the most errors for a session - :param str workflow: Is the primary name of the workflow - :param cherrypy.Session session: Is the user's cherrypy session + :param workflowinfo.WorkflowInfo workflow: the workflow that we want to get the errors from :returns: The error code that appears most often for this workflow :rtype: int """ + errors = workflow.get_errors(True) + errors_summed = defaultdict(int) - curs, _, allerrors, _ = check_session(session).info - - num_errors = [] - - for errorcode in allerrors: - output = curs.execute("SELECT SUM(numbererrors) FROM workflows WHERE " - "stepname LIKE '/{0}/%' AND errorcode={1}".\ - format(workflow, errorcode)) + for codes in errors.values(): + for errorcode, sites in codes.items(): + numcode = -1 if errorcode == 'NotReported' else int(errorcode) + for num in sites.values(): + errors_summed[numcode] += num - num_errors.append(output[0]) + output = 0 + max_num = 0 + for code, num in errors_summed.items(): + if num > max_num: + max_num = num + output = code - return allerrors[num_errors.index(max(num_errors))] + return output diff --git a/workflowwebtools/workflowtools.py b/workflowwebtools/workflowtools.py index 887f258..0426d54 100644 --- a/workflowwebtools/workflowtools.py +++ b/workflowwebtools/workflowtools.py @@ -603,8 +603,8 @@ def classifyerror(self, workflow): self.seeworkflowlock.acquire() try: - max_error = classifyerrors.get_max_errorcode(workflow, cherrypy.session) - main_error_class = classifyerrors.classifyerror(max_error, workflow, cherrypy.session) + max_error = classifyerrors.get_max_errorcode(self.get(workflow)) + main_error_class = classifyerrors.classifyerror(max_error, self.get(workflow)) output = { 'maxerror': max_error, From b61ed4f7636cda76e42a4baef87b559aa9d93abd Mon Sep 17 00:00:00 2001 From: Daniel Abercrombie Date: Thu, 6 Dec 2018 14:02:13 -0500 Subject: [PATCH 6/6] Take out unused import --- workflowwebtools/classifyerrors.py | 1 - 1 file changed, 1 deletion(-) diff --git a/workflowwebtools/classifyerrors.py b/workflowwebtools/classifyerrors.py index 5c8e5d3..aa9910b 100644 --- a/workflowwebtools/classifyerrors.py +++ b/workflowwebtools/classifyerrors.py @@ -11,7 +11,6 @@ from collections import defaultdict from .procedures import PROCEDURES -from .globalerrors import check_session def classifyerror(errorcode, workflow): """