Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
'validators',
'tabulate',
'pymongo<3.5.0',
'cx_Oracle'
'cx_Oracle',
'pandas',
'keras',
'tensorflow'
]
)
2 changes: 1 addition & 1 deletion workflowwebtools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
:author: Daniel Abercrombie <dabercro@mit.edu>
"""

__version__ = '0.8.2'
__version__ = '0.9.1'

__all__ = []
39 changes: 21 additions & 18 deletions workflowwebtools/classifyerrors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

import re

from collections import defaultdict

from .procedures import PROCEDURES
from .globalerrors import check_session

def classifyerror(errorcode, workflow, session=None):
def classifyerror(errorcode, workflow):
"""
Return the most relevant characteristics of an error code for this session.
This will include things like:
Expand All @@ -24,8 +25,7 @@ def classifyerror(errorcode, workflow, session=None):
More error types should be added to this function as needed

:param int errorcode: The error code that we want to classify
:param str workflow: the workflow that we want to get the errors from
:param cherrypy.Session session: Is the user's cherrypy session
:param workflowinfo.WorkflowInfo workflow: the workflow that we want to get the errors from
:returns: A tuple of strings describing the key characteristics of the errorcode.
These strings are good for printing directly in web browsers.
The first string is the types of errors reported with this error code.
Expand All @@ -36,7 +36,7 @@ def classifyerror(errorcode, workflow, session=None):

procedure = PROCEDURES.get(errorcode, {})

logs = check_session(session).get_workflow(workflow).get_explanation(str(errorcode))
logs = workflow.get_explanation(str(errorcode))

error_re = re.compile(r'[\w\s]+ \(Exit code: (\d+)\)')
error_types = {}
Expand Down Expand Up @@ -79,26 +79,29 @@ def classifyerror(errorcode, workflow, session=None):
additional_actions_string.replace(' |br| |br| ', '<br>'))


def get_max_errorcode(workflow, session=None):
def get_max_errorcode(workflow):
"""
Get the errorcode with the most errors for a session

:param str workflow: Is the primary name of the workflow
:param cherrypy.Session session: Is the user's cherrypy session
:param workflowinfo.WorkflowInfo workflow: the workflow that we want to get the errors from
:returns: The error code that appears most often for this workflow
:rtype: int
"""

errors = workflow.get_errors(True)
errors_summed = defaultdict(int)

curs, _, allerrors, _ = check_session(session).info

num_errors = []

for errorcode in allerrors:
output = curs.execute("SELECT SUM(numbererrors) FROM workflows WHERE "
"stepname LIKE '/{0}/%' AND errorcode={1}".\
format(workflow, errorcode))
for codes in errors.values():
for errorcode, sites in codes.items():
numcode = -1 if errorcode == 'NotReported' else int(errorcode)
for num in sites.values():
errors_summed[numcode] += num

num_errors.append(output[0])
output = 0
max_num = 0
for code, num in errors_summed.items():
if num > max_num:
max_num = num
output = code

return allerrors[num_errors.index(max(num_errors))]
return output
179 changes: 175 additions & 4 deletions workflowwebtools/predict/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,189 @@
# pylint: disable=missing-docstring, too-complex, invalid-name, too-many-branches, too-many-locals

"""
A module that evaluates a model and returns the prediction
"""

#from cmstoolbox import sitereadiness
import os
import random
import itertools

import numpy as np
import pandas as pd
import keras as K


def modified_site_name(site):
site_name = site.split('_')[:-1]
s = ''
for i in site_name:
s = s+i+'_'
s = s.rstrip('_')
return s

def build_table(df, template_table):
sparse_df = template_table.copy()

tier0_sites, tier1_sites, tier2_sites, tier3_sites = [], [], [], []
for i in sparse_df.keys():
if i != 'NA':
if i[1] == '0':
tier0_sites.append(i)
elif i[1] == '1':
tier1_sites.append(i)
elif i[1] == '2':
tier2_sites.append(i)
elif i[1] == '3':
tier3_sites.append(i)

n0, n1, n2, n3 = len(tier0_sites), len(tier1_sites), len(tier2_sites), len(tier3_sites)
for exit_code, site_dict in zip(df.keys(), df.values()):
exit_code = int(exit_code)
for site, count in site_dict.items():

chosen_site = None
site_present_in_training_data = site in sparse_df.keys()
if not site_present_in_training_data:

site = modified_site_name(site)
cond = site in sparse_df.keys()
if cond:
chosen_site = site

print "Detected a site %s which was not present in the training dataset" % site
print "We would use a proxy site for this based on whether it is T1, T2 or T3"
tier = site.split("_")[0][1]
if chosen_site is None:
if tier == '1':
chosen_num = random.randint(0, n1-1)
chosen_site = tier1_sites[chosen_num]
elif tier == '2':
chosen_num = random.randint(0, n2-1)
chosen_site = tier2_sites[chosen_num]
print 'The chosen site is ', chosen_site
elif tier == '3':
chosen_num = random.randint(0, n3-1)
chosen_site = tier3_sites[chosen_num]
print 'The chosen site is ', chosen_site
elif tier == '0':
chosen_num = random.randint(0, n0-1)
chosen_site = tier0_sites[chosen_num]
print 'The chosen site is ', chosen_site
else:
continue
if chosen_site is None:
chosen_site = site

if np.isnan(count) or np.isnan(sparse_df.loc[exit_code, chosen_site]):
sparse_df.loc[exit_code, chosen_site] = 0
else:
sparse_df.loc[exit_code, chosen_site] = count

return sparse_df


def list_of_sites(x):
return [item.keys() for item in x] or ['NA']

def build_table_flatten(x):
d_outer = []

for column in x:

for item in x[column]:
d_outer.append(item)

return d_outer


def pred(errors):
# Needs all of these files to be local
for filename in ['sparse_table.csv', 'actionfile.txt', 'my_model.h5']:
if not os.path.exists(filename):
return ['TBD']

df = pd.DataFrame(columns=('workflow', 'errors'))
base_data = []
for i in errors:
it = i.items()
base_data.extend(it)

for i, dat in enumerate(base_data):
workflow, error_dict = dat[0], dat[1]

if 'NotReported' in error_dict:
error_dict[-1] = error_dict.pop('NotReported')

df.loc[i] = [workflow, error_dict]

template_table = pd.read_csv("sparse_table.csv").set_index("Unnamed: 0")
template_table[:] = 0
df['errors_sites_exit_codes'] = df['errors'].apply(lambda x: x.keys() if x else ['0'])

df['errors_sites_dict'] = df['errors'].apply(lambda x: x.values() if x else [{'NA': 0}])

df['errors_sites_list'] = df['errors_sites_dict'].apply(list_of_sites)

list2d = df['errors_sites_exit_codes'].tolist()

sites_exit_codes = sorted(set(list(itertools.chain.from_iterable(list2d))), key=int)
sites_exit_codes = [str(x) for x in sites_exit_codes]

list2d_step1 = df['errors_sites_list'].tolist()
list2d_step2 = list(itertools.chain.from_iterable(list2d_step1))
site_names = sorted(set(list(itertools.chain.from_iterable(list2d_step2))))
site_names = [str(x) for x in site_names]

df['table_sites'] = df['errors'].apply(
lambda x: build_table(x, template_table))
df['table_sites_flatten'] = df['table_sites'].apply(build_table_flatten)
x_dataframe = df.loc[:, "table_sites_flatten"]
x_matrix = x_dataframe.values
feature_size = len(x_matrix[0])
res = []
clip_length = len(x_matrix[0])
for i in x_matrix:
i_clipped = i[:clip_length]
res.extend(i_clipped)

res = np.asarray(res).reshape(-1, feature_size)
mask = ~np.any(pd.isnull(res), axis=1)
res = res[mask]

model = K.models.load_model('my_model.h5')
predicted_actions_encoded = model.predict(np.array(np.asfarray(res)))
predicted_actions_encoded = np.round(predicted_actions_encoded)

action_code_dictionary = {}
a = np.genfromtxt("actionfile.txt", delimiter='\t', dtype=str)
b = list(i.split(' ') for i in a)
for i in b:

action_code_dictionary[int(i[1])] = i[0]

predicted_actions = []
for i in predicted_actions_encoded:
pos = np.argmax(i)

if pos in action_code_dictionary:
predicted_actions.append(action_code_dictionary[pos])
else:
predicted_actions.append(-1)

K.backend.clear_session()

return predicted_actions


def predict(errors): # pylint: disable=unused-argument
def predict(wf_obj):
"""
Takes the errors for a workflow and makes an action prediction
:param workflowwebtool.workflowinfo.WorkflowInfo errors:
:param workflowwebtool.workflowinfo.WorkflowInfo wf_obj:
The WorkflowInfo object that we want to perform a prediction on
:returns: Prediction results to be passed back to a browser
:rtype: dict
"""

return {'Action': 'TBD'}
return {
'Action': pred([wf_obj.get_errors(True)])[0]
}
22 changes: 20 additions & 2 deletions workflowwebtools/web/static/js/submit.js
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,18 @@ function buildSubmit (workflow, sitesToRun) {
return output;
}


function showStatus(workflow) {
$.ajax({
url: "/getstatus",
data: {"workflow": workflow},
success: function (status) {
document.getElementById("actionstatus").innerHTML = "Action: " + status.status;
}
});
}


function makeForm(workflow) {

$.ajax({
Expand All @@ -326,14 +338,19 @@ function makeForm(workflow) {
form.action = "javascript:;";
form.onsubmit = function () {
submission = buildSubmit(workflow, params.sitestorun);
if (confirm("Will submit " + JSON.stringify(submission)))
if (confirm("Will submit " + JSON.stringify(submission))) {
$.ajax({
url: "/submit2",
type: "POST",
dataType: "json",
contentType : 'application/json',
data: JSON.stringify({documents: submission})
data: JSON.stringify({documents: submission}),
success: function () {
showStatus(workflow);
alert('Action Submitted');
}
});
}
};

addOptions(form, params);
Expand Down Expand Up @@ -361,4 +378,5 @@ function makeForm(workflow) {
function prepareSubmit (workflow) {
makeForm(workflow);
setReasons();
showStatus(workflow);
};
1 change: 1 addition & 0 deletions workflowwebtools/web/templates/workflowtables2.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

<center id="top">
<h1>${workflow}</h1>
<div id="actionstatus"></div>
<a href="/globalerror2">Global Errors</a> <br>
</center>

Expand Down
12 changes: 9 additions & 3 deletions workflowwebtools/workflowtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ def get_status(self, workflow):
return "none"
return "acted" if status else "pending"

@cherrypy.expose
@cherrypy.tools.json_out()
def getstatus(self, workflow):
return {'status': self.get_status(workflow).capitalize()}

def get(self, workflow):
self.wflock.acquire()
Expand Down Expand Up @@ -231,10 +235,12 @@ def getworkflows(self, prepid):

@cherrypy.expose
@cherrypy.tools.json_in()
@cherrypy.tools.json_out()
def submit2(self):
input_json = cherrypy.request.json
manageactions.submit2(input_json['documents'])
return 'Done'
self.update_statuses()
return {'message': 'Done'}


@cherrypy.expose
Expand Down Expand Up @@ -597,8 +603,8 @@ def classifyerror(self, workflow):
self.seeworkflowlock.acquire()

try:
max_error = classifyerrors.get_max_errorcode(workflow, cherrypy.session)
main_error_class = classifyerrors.classifyerror(max_error, workflow, cherrypy.session)
max_error = classifyerrors.get_max_errorcode(self.get(workflow))
main_error_class = classifyerrors.classifyerror(max_error, self.get(workflow))

output = {
'maxerror': max_error,
Expand Down