In [1]:
from aiida import load_profile

profile = load_profile()
import numpy as np
from aiida_kkr.workflows import combine_imps_wc
from aiida.orm import load_node, Dict, groups
# load classes and functions
from aiida.orm import CifData, Code, Dict, load_node, StructureData 
from aiida.orm import (Group, load_group, load_node, load_code, groups,groups,
                      WorkChainNode, CalcFunctionNode)
from aiida.engine import submit, exceptions
from aiida.common.exceptions import NotExistent
from aiida_kkr.workflows import kkr_imp_sub_wc, kkr_imp_dos, kkr_imp_wc, kkr_startpot_wc, combine_imps_wc, kkr_flex_wc
import numpy as np
from aiida_kkr.calculations import KkrimpCalculation, VoronoiCalculation, KkrCalculation
import matplotlib.pyplot as plt
from aiida_kkr.tools.common_workfunctions import get_username
import matplotlib.gridspec as gridspec
from aiida_kkr.tools import plot_kkr
from aiida_kkr.tools.combine_imps import combine_potentials_cf
from aiida.common.exceptions import NotExistentAttributeError 
from aiida.common.exceptions import NotExistent

In [2]:
#Section-1: delete node from the database as well as remote work directory
### A function to delete the data of calculation output of calcjob list (pks) from the remote dir.

# later add it with the del_node  function
# This is successfully done

# please note that it is tested for one calc list
def delete_remote_workdir(pks, verbosity=0, dry_run= True):
    from aiida.common import exceptions, NotExistent
    from aiida.orm import load_node
    from aiida.orm import computers
    import sys

    """
    :param pks: calc node list
    :param int verbosity: 0 prints nothing,
                          1 prints just sums and total,
                          2 prints individual nodes.
                          3 prints path before and after delete path                          
    """
    
    removed_path_list = [] # The part of the path will be deleted
    remote_path_list = []  # The original path
    updated_path_list = [] # After removing the part of the path
    loadable_list = [] # To load the node and save it loadable_list
    loaded_node_list = []
    # To check the loadable calcjob list
    for pk in pks:
        try:
            loaded_node = load_node(pk)
        except exceptions.NotExistent:
            print('This is calcjob node'.format(pk))
            loaded_node = pk
            loaded_node_list.append(loaded_node)
        else:
            loaded_node_list.append(loaded_node)
   
    for node in loaded_node_list:
        load_pk = node
        # computer data
        computer = load_pk.computer
        computer_name = computer.label
        print(computer_name)
        try:
            remote_path = load_pk.get_remote_workdir()
            
        except NotExistent as ex:
            print(f'Node (label,pk) ({node.label, node.pk}) does not have remote workdir. with exception : {ex}')
        if isinstance(remote_path, str):
            remote_path_list.append(remote_path)
            delete_folder = remote_path.split('/')[-1]
            removed_path_list.append(delete_folder)

            new_remote_path = remote_path.replace(remote_path.split('/')[-1], '')
            updated_path_list.append(new_remote_path)
        
        
    if verbosity == 3:

        for i, paths in enumerate(zip(remote_path_list, updated_path_list)):
            print('Before the delation the original path list : {}\n'.format(paths[0]))
            print('After deletion the modefied or updated path : {}'.format(paths[1]))
    if verbosity in [2,3]:
        val = input("Are you agree to clean the remote workdir (y/n) : ")
    else:
        val = 'y'
    if str(val)=='y' or str(val)=='Y':
        if not dry_run:
            for remote_path in remote_path_list:
                try:
                    # Open the connection to the remote folder/dir via transport
                    computer_transport = computer.get_transport()
                    is_transport_open = computer_transport.is_open
                    if not is_transport_open:
                        computer_transport.open()
                    computer_transport.rmtree(remote_path)
                except IOError as ex:
                    print('Uable to open the computer transport: ', ex)
                except NotExistent:
                    print('Not possible to connect to the remote directory')
    else:
        print('Nothing to clean from the remote workdir!')

# section-5:delete node from the database as well as remote work directory
## It returns all the calcjob from a WC node
def find_calcJob(pk_or_node, debug=True):
    
    calcjob_node_list=[]
    wc_node_list = []
    try:
        if isinstance( pk_or_node, int):
            if debug:
                print('This is pk')
            node = load_node(pk_or_node)
        else:
            if debug:
                print('This is node.')
            node= pk_or_node
    except:
        print('{} is nither node ID nor aiida_node. '.format(pk_or_node))
        
    ## Use the get_calcjob_wc to get descendent calcjob list and  wc list
    calc_list, wc_list = get_calcjob_wc(node)
    calcjob_node_list += calc_list
    
    while len(wc_list)!=0:
        new_wc_list = []

        for i in wc_list[:]:
            calc_list, wc_list = get_calcjob_wc(i)
            new_wc_list += wc_list
            calcjob_node_list += calc_list
            
        wc_list = new_wc_list

    return calcjob_node_list

## This function returns calcjob_list and wc_list from a wc or calcjob node   
def get_calcjob_wc(node):
    """
    :param: node
    :return: workchain node list and calcjob node list
    """ 
    from aiida.orm import CalcJobNode, WorkChainNode
    wc = []
    calc_job = []
    
    if node.node_type == 'process.workflow.workchain.WorkChainNode.':
        
    # here all outgoing worchain node
        out_going_wc = node.get_outgoing(node_class=WorkChainNode).all()
        wc = [i.node for i in out_going_wc[:]]
        
    # here all outgoing calcjob node
        out_going_calc = node.get_outgoing(node_class=CalcJobNode).all()
        calc_job = [i.node for i in out_going_calc[:]]
                    
    elif node.node_type == 'process.calculation.calcjob.CalcJobNode.':
        calc_job.append(node)
    
    return calc_job, wc

# This is the final del_node_function. Using this function for any specific wc node the node from the 
# Db as well as the calcjob data from the remote workdir can be deleted.
def del_node(node_pks, dry_run=True, verbosity=3, debug=True, only_remote_dir=False,
            only_database=False):
    """
    1. This function will delete the node data from the database and also from the remote_dir
    
    :params node_pks: (list) list of workchain to delete from database as well as from remote workdir
    :param verbosity: 0 prints nothing.  This is for workdir and wc
                      1 prints just sums and total.   This is for workdir but not for wc
                      2 prints indivisual nodes.  This is for workdir and wc
                      3 prints path before and after delete path
    :param dry_run: Do not delete anything just show the status as in the verbosity given
    """
    from aiida.orm import load_node
  #  from aiida.manage.database.delete.nodes import delete_nodes
    from aiida.tools import delete_nodes
        
    calcjobs_list = []
    pks_given = []
    for i in node_pks:    
        try:
            if isinstance( i, int):
                if debug:
                    print('This might be pk or uiid')
                node = load_node(i)
            else:
                if debug:
                    print('This might be a node.')
                node= i
        except:
            print('{} is nither node ID nor aiida_node. '.format(i))
        
        pks_given.append(node.pk)
        
        calcjobs = find_calcJob(node, debug)
        calcjobs_list += calcjobs
        print('calcjob list : ', calcjobs_list,)

    if only_remote_dir:
        if calcjobs:
            raise NotExistent('No calc Job has been found.')
#        else:
#            delete_remote_workdir(calcjobs_list, verbosity=verbosity, dry_run=dry_run)
    if only_database:
        delete_nodes(pks_given, verbosity=verbosity, dry_run=dry_run,force=False)
    

In [3]:
## This part of code intended for cleaning of ptential and Retrived files
import subprocess as sp
from subprocess import CalledProcessError 
class SomeTools(object):
    
    def __init__(self, aiida_repo_path):

        self.start_path = aiida_repo_path
        
    def uuid_to_full_path(self, uuid):
        ui_li = list(uuid)
        dir_1 = ''.join(ui_li[0:2]) #,'/', ui_sp[2:4],'/', ui_sp[4:],'/path/'])
        dir_2 = ''.join(ui_li[2:4])
        dir_3 = ''.join(ui_li[4:])
        file_path = self.start_path + '/repository/node/' +'/'.join([dir_1,dir_2, dir_3,'path/*'])
        return file_path
    
    @classmethod
    def CleanPotentialRetrieved(cls, Wc_or_Cal, aiida_repo_path, deburg=True):
        self=cls(aiida_repo_path=aiida_repo_path)
        
        uuid_list = []
        file_path_list = []
        
        if Wc_or_Cal.process_label=='combine_imps_wc':
            pot_uuid = Wc_or_Cal.outputs.last_potential.uuid
            uuid_list.append(pot_uuid)
        elif Wc_or_Cal.process_label== 'kkr_imp_wc':
            pot_uuid = Wc_or_Cal.outputs.converged_potential.uuid
            uuid_list.append(pot_uuid)
        elif Wc_or_Cal.process_label== 'kkr_imp_dos_wc':
            try:
                pot_uuid = Wc_or_Cal.outputs.converged_potential.uuid
                uuid_list.append(pot_uuid)
            except NotExistentAttributeError:
                print('The DOS WC node is run from the converged kkr_imp_wc.')  
        else: 
            raise NotExistent('the wc in not combine_imps_wc')
            
        uuid_list = uuid_list + self.colct_kkrimp_retrived(Wc_or_Cal)
        uuid_list = uuid_list + self.colct_pot_sfd(Wc_or_Cal)
        
        for uuid in uuid_list:
            file_path = self.uuid_to_full_path(uuid)
            file_path_list.append(file_path)
            if not deburg:
                try:
                    ll = sp.check_output('rm '+ '-r '+  file_path + '> database_cleaning_output_error.txt 2>&1',
                                        shell = True,
                                        stderr=sp.STDOUT)

                except CalledProcessError:
                    print('subprocess error')
            else:
                print('file path: ', file_path)
                
    @staticmethod      
    def colct_kkrimp_retrived(WC_node):
        
        retrieved_uuids = []
        
        if WC_node.process_label=='combine_imps_wc':
            retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in WC_node.get_outgoing(node_class = KkrimpCalculation).all()]
            kkr_imp_subs = [i.node for i in WC_node.get_outgoing(node_class = kkr_imp_sub_wc).all()]
            for node in kkr_imp_subs:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = KkrimpCalculation).all()]

            kkr_flexs = [i.node for i in WC_node.get_outgoing(node_class = kkr_flex_wc).all()]
            for node in kkr_flexs:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = KkrCalculation).all()]  
                
        elif WC_node.process_label=='kkr_imp_wc':
            kkr_voronois = [i.node for i in WC_node.get_outgoing(node_class = kkr_startpot_wc).all()]
            for node in kkr_voronois:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = VoronoiCalculation).all()]
                
            kkr_flexs = [i.node for i in WC_node.get_outgoing(node_class = kkr_flex_wc).all()]
            for node in kkr_flexs:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = KkrCalculation).all()]  
                
            kkr_imp_subs = [i.node for i in WC_node.get_outgoing(node_class = kkr_imp_sub_wc).all()]            
            for node in kkr_imp_subs:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = KkrimpCalculation).all()]
                
        elif WC_node.process_label=='kkr_imp_dos_wc':
            kkr_voronois = [i.node for i in WC_node.get_outgoing(node_class = kkr_startpot_wc).all()]
            for node in kkr_voronois:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = VoronoiCalculation).all()]  
            
            kkr_flexs = [i.node for i in WC_node.get_outgoing(node_class = kkr_flex_wc).all()]
            for node in kkr_flexs:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = KkrCalculation).all()]  
            print( 'I m from kkr_imp_dos_wc', kkr_flexs)
            kkr_imp_subs = [i.node for i in WC_node.get_outgoing(node_class = kkr_imp_sub_wc).all()]            
            for node in kkr_imp_subs:
                retrieved_uuids = retrieved_uuids + [i.node.outputs.retrieved.uuid for i in node.get_outgoing(node_class = KkrimpCalculation).all()]                
        else:
            raise NotExistent('The given node is not from combine_imps_wc or kkr_imp_wc')
            print('retrieved', retrieved_uuids)
        return retrieved_uuids
    
    @staticmethod
    def colct_pot_sfd(WC_node):

        pot_sfd_uuids = []
        
        
        if WC_node.process_label=='combine_imps_wc':
            intended_labels = ['combine_potentials_cf', 'extract_imp_pot_sfd']
            pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in WC_node.get_outgoing(node_class = CalcFunctionNode).all() 
                                 if i.node.process_label in intended_labels]

            kkr_imp_subs = [i.node for i in WC_node.get_outgoing(node_class = kkr_imp_sub_wc).all()]
            for node in kkr_imp_subs:
                pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in node.get_outgoing(node_class = CalcFunctionNode).all()
                                    if i.node.process_label in intended_labels]
    
            pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in WC_node.get_outgoing(node_class = CalcFunctionNode).all() 
                                 if i.node.process_label=='neworder_potential_wf']
                
        elif WC_node.process_label=='kkr_imp_wc':
            intended_label = 'extract_imp_pot_sfd'
            kkr_imp_subs = [i.node for i in WC_node.get_outgoing(node_class = kkr_imp_sub_wc).all()]
            for node in kkr_imp_subs:
                pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in node.get_outgoing(node_class = CalcFunctionNode).all()
                                    if i.node.process_label==intended_label]
            pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in WC_node.get_outgoing(node_class = CalcFunctionNode).all() 
                                 if i.node.process_label=='neworder_potential_wf']
        elif WC_node.process_label=='kkr_imp_dos_wc':
            intended_label = 'extract_imp_pot_sfd'
            kkr_imp_subs = [i.node for i in WC_node.get_outgoing(node_class = kkr_imp_sub_wc).all()]
            for node in kkr_imp_subs:
                pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in node.get_outgoing(node_class = CalcFunctionNode).all()
                                    if i.node.process_label==intended_label]
            pot_sfd_uuids = pot_sfd_uuids + [i.node.outputs.result.uuid for i in WC_node.get_outgoing(node_class = CalcFunctionNode).all() 
                                 if i.node.process_label=='neworder_potential_wf']
                
#        print(',,,,,,,,,', pot_sfd_uuids)
        pot_sfd_uuids = list(set(pot_sfd_uuids))
        return pot_sfd_uuids


In [4]:
grps = [ '0aa45296-45d4-418b-a596-680f88c3193b','1bed20ba-de50-469c-8bc8-44e4c648daca','762e0df8-6313-460f-b97b-77b68363b878',
        'f8dd374e-4c17-4e46-afd9-171c7f3d1205','9bc27af1-5cc7-4e0e-8ed5-fe6d045eb3e1','be8b271f-560b-44e0-ae45-6ca29c94382d',
        '6b21bb7c-f43d-450f-9aca-56970e1b776c','2b327305-8c53-4637-b2f9-40cc1bdd03d2','b1c6d0e9-4a2a-4f2f-808b-dd52bd878699',
        '96f2ec34-e512-4d47-ac34-d49c36f0c796','982667b6-11ce-4c8c-9e9b-94440356b62f','79b27008-b219-4445-8f16-fcae171c88ce',
        '5c4c6308-d019-487a-8bba-65ffd9596565','ebe1fd31-89f7-4ca1-bf7f-0856940a7ad5','84649009-607a-45c1-a56d-3d7f2593f434',
        'b79c67ed-3a2e-4cf9-878f-f6f16cf5d610',]

node_for_db_cleaning = []
for grp in grps:
    try:
        grp_184 = list(load_group(grp).nodes)
        node_for_db_cleaning = node_for_db_cleaning + grp_184
    except NotExistent:
        print(f'{grp} does not exist')
        continue

In [None]:
aiida_repo_path = profile.repository_path
for node in node_for_db_cleaning[:]:
    SomeTools.CleanPotentialRetrieved(node, aiida_repo_path=aiida_repo_path, deburg=False)

In [56]:
# An example to delete an calc or process node from remote directory as well as local directory
for nod in node_for_delete:
    try:
        del_node([nod], dry_run = False, verbosity = 3, 
            debug = False, only_remote_dir = False, only_database= True)
    except ObjectDoesNotExist:
        print(f'node {nod} does not exist')

In [14]:
xx = 81256
del_node([80950], dry_run = False, verbosity = 3, 
            debug = False, only_remote_dir = False, only_database= True)

calcjob list :  [<CalcJobNode: uuid: 861b4e7d-d80b-471e-bc7e-62bf794ae683 (pk: 80950) (aiida.calculations:kkr.voro)>]
