Skip to content

Commit

Permalink
Recovery: Adjust suspicious replica recoverer creation of rules. ruci…
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristophAmes committed Dec 12, 2023
1 parent 4a8c078 commit aaa4584
Show file tree
Hide file tree
Showing 6 changed files with 258 additions and 122 deletions.
4 changes: 2 additions & 2 deletions bin/rucio-replica-recoverer
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,8 @@ Note that attempting the use the ``--vos`` argument when in single-VO mode will
$ rucio-replica-recoverer --run-once --vos abc xyz
2020-07-28 15:21:33,349 5488 WARNING Ignoring argument vos, this is only applicable in a multi-VO setup.
''', formatter_class=argparse.RawDescriptionHelpFormatter) # NOQA: E501
parser.add_argument("--nattempts", action="store", default=10, help='Minimum count of suspicious file replica appearance in bad_replicas table. Default value is 10.')
parser.add_argument("--younger-than", action="store", default=3, help='Consider all file replicas logged in bad_replicas table since speicified number of younger-than days. Default value is 3.')
parser.add_argument("--nattempts", action="store", default=5, help='Minimum count of suspicious file replica appearance in bad_replicas table. Default value is 5.')
parser.add_argument("--younger-than", action="store", default=5, help='Consider all file replicas logged in bad_replicas table since speicified number of younger-than days. Default value is 5.')
parser.add_argument('--vos', nargs='+', type=str, help='Optional list of VOs to consider. Only used in multi-VO mode.')
parser.add_argument("--run-once", action="store_true", default=False, help='One iteration only.')
parser.add_argument("--limit-suspicious-files-on-rse", action="store", default=5, help='Maximum number of suspicious replicas on an RSE before that RSE is considered problematic and the suspicious replicas on that RSE are declared "TEMPORARY_UNAVAILABLE". Default value is 5.')
Expand Down
5 changes: 5 additions & 0 deletions etc/suspicious_replica_recoverer.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,9 @@
"datatype": ["RAW"],
"scope": []
}
{
"action": "dry run",
"datatype": [],
"scope": ["mc.*"]
}
]
9 changes: 7 additions & 2 deletions lib/rucio/core/replica.py
Original file line number Diff line number Diff line change
Expand Up @@ -3406,7 +3406,7 @@ def get_replicas_state(scope=None, name=None, *, session: "Session"):


@read_session
def get_suspicious_files(rse_expression, available_elsewhere, filter_=None, logger=logging.log, younger_than=10, nattempts=0, nattempts_exact=False, *, session: "Session", exclude_states=['B', 'R', 'D'], is_suspicious=False):
def get_suspicious_files(rse_expression, available_elsewhere, filter_=None, logger=logging.log, younger_than=5, nattempts=0, nattempts_exact=False, *, session: "Session", exclude_states=['B', 'R', 'D'], is_suspicious=False):
"""
Gets a list of replicas from bad_replicas table which are: declared more than <nattempts> times since <younger_than> date,
present on the RSE specified by the <rse_expression> and do not have a state in <exclude_states> list.
Expand Down Expand Up @@ -3539,7 +3539,12 @@ def get_suspicious_reason(rse_id, scope, name, nattempts=0, logger=logging.log,
query = session.query(bad_replicas_alias.scope, bad_replicas_alias.name, bad_replicas_alias.reason, bad_replicas_alias.rse_id)\
.filter(bad_replicas_alias.rse_id == rse_id,
bad_replicas_alias.scope == scope,
bad_replicas_alias.name == name)
bad_replicas_alias.name == name,
bad_replicas_alias.state == 'S',
~exists(select(1).where(and_(bad_replicas_alias.rse_id == rse_id,
bad_replicas_alias.scope == scope,
bad_replicas_alias.name == name,
bad_replicas_alias.state != 'S',))))
count = query.count()

query_result = query.group_by(bad_replicas_alias.rse_id, bad_replicas_alias.scope, bad_replicas_alias.name, bad_replicas_alias.reason).having(func.count() > nattempts).all()
Expand Down
2 changes: 1 addition & 1 deletion lib/rucio/daemons/auditor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def process_output(output, sanity_check=True, compress=True):
rse = os.path.basename(output[:output.rfind('_')])
rse_id = get_rse_id(rse=rse)
usage = get_rse_usage(rse_id=rse_id, source='rucio')[0]
threshold = config.config_get_float('auditor', 'threshold', False, 0.2)
threshold = config.config_get_float('auditor', 'threshold', False, 0.1)

# Perform a basic sanity check by comparing the number of entries
# with the total number of files on the RSE. If the percentage is
Expand Down
Loading

0 comments on commit aaa4584

Please sign in to comment.