In [None]:
from core.utils import Tibanna
from core import ff_utils
from datetime import datetime

# set enviroment and key/connection
env = 'fourfront-webdev'
tibanna = Tibanna(env=env)
ff = ff_utils.fdn_connection(key=tibanna.ff_keys)

def get_wfr_report(wfr):
    """For a given workflow_run_sbg item, grabs details, uuid, run_status, wfr name, date, and run time"""
    wfr_data= ff_utils.get_metadata(wfr , connection=ff)
    wfr_uuid = wfr_data['uuid']
    wfr_status = wfr_data['run_status']
    wfr_name = wfr_data['title'].split(' run ')[0]
    wfr_time = datetime.strptime(wfr_data['date_created'],'%Y-%m-%dT%H:%M:%S.%f+00:00')
    run_hours = (datetime.now()-wfr_time).total_seconds()/3600
    wfr_rep = {'wfr_uuid': wfr_data['uuid'],
               'wfr_status': wfr_data['run_status'],
               'wfr_name': wfr_data['title'].split(' run ')[0],
               'wfr_date': wfr_time,
               'run_time': run_hours}
    return wfr_rep

    
def printTable(myDict, colList=None):
    """ Pretty print a list of dictionaries Author: Thierry Husson"""
    if not colList: colList = list(myDict[0].keys() if myDict else [])
    myList = [colList] # 1st row = header
    for item in myDict: myList.append([str(item[col] or '') for col in colList])
    colSize = [max(map(len,col)) for col in zip(*myList)]
    formatStr = ' | '.join(["{{:<{}}}".format(i) for i in colSize])
    myList.insert(1, ['-' * i for i in colSize]) # Seperating line
    for item in myList: print(formatStr.format(*item))


    

In [None]:
# This script is looking at the given enviroment and going through all the input files
# to see if any of then have a mising/deleted/obsolete input file.
# The first part is only reporting, and the second part is if you want to change the status
# to deleted and also delete output files.
# Give it some time before rerunning

delete_workflows = raw_input("Do you want to delete old workflowruns (if not, only report will be displayed (y/n))")

# what kind of files should be searched for worflow run inputs, use url compatible naming
raw_file_types = ['files-fastq']

# checked workflows
workflow_names = ['md5', 'fastqc-0-11-4-1/1', 'hi-c-processing-parta-juicer/5']

deleted_wfr_no = 0
files_with_deleted_wfr = 0
# get all wfr
for raw_file_type in raw_file_types:
    # get all files for the given file type
    files = ff_utils.get_metadata(raw_file_type , connection=ff)['@graph']
    for raw_file in files:
        # switch for counting files with deleted workflow_runs
        deleted_wf = False
        wfr_report = []
        wfrs = raw_file.get('workflow_run_inputs')
        # get a report on all workflow_runs
        if wfrs:
            for wfr in wfrs:
                wfr_report.append(get_wfr_report(wfr))  
        # sort the report by date and name
        wfr_report = sorted(wfr_report, key=lambda k: (k['wfr_date'], k['wfr_name']))
        #printTable(wfr_report,['wfr_date', 'wfr_name']) 
        #for each type of worklow make a list of old ones, and patch status and description
        for wf_name in workflow_names:
            sub_list_del = [i for i in wfr_report if i['wfr_name'] == wf_name][:-1]
            if sub_list_del:
                deleted_wf = True
                for wfr_to_del in sub_list_del:
                    deleted_wfr_no += 1
                    if delete_workflows.lower() in ['y', 'yes']:
                        patch_data = {'description': "This workflow run is deleted since there was a new run",
                                      'status': "deleted"}
                        ff_utils.patch_metadata(patch_data, obj_id=wfr_to_del['wfr_uuid'] ,connection=ff)
        if deleted_wf:
            files_with_deleted_wfr += 1


print str(deleted_wfr_no)," workflowruns from", str(files_with_deleted_wfr), "files needs to be deleted"

        
