From 9be7b5bc0f8902f2277e1c0c4bac76d2a2e38cbe Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Wed, 22 Mar 2023 16:39:27 +0100 Subject: [PATCH 1/8] feat: added command RegisterPilot --- Pilot/pilotCommands.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/Pilot/pilotCommands.py b/Pilot/pilotCommands.py index c71582d8..8860e97f 100644 --- a/Pilot/pilotCommands.py +++ b/Pilot/pilotCommands.py @@ -520,6 +520,41 @@ def _getSecurityCFG(self): self.cfg.append("-o /DIRAC/Security/KeyFile=%s/hostkey.pem" % self.pp.certsLocation) +class RegisterPilot(CommandBase): + """The Pilot self-announce its own presence""" + + def __init__(self, pilotParams): + """c'tor""" + super(RegisterPilot, self).__init__(pilotParams) + + # this variable contains the options that are passed to dirac-admin-add-pilot + self.cfg = [] + self.pilotStamp = os.environ.get("DIRAC_PILOT_STAMP", '') + + @logFinalizer + def execute(self): + """Calls dirac-admin-add-pilot""" + + if self.pp.useServerCertificate: + self.cfg.append("-o /DIRAC/Security/UseServerCertificate=yes") + if self.pp.localConfigFile: + if LooseVersion(self.releaseVersion) >= self.cfgOptionDIRACVersion: + self.cfg.append("--cfg") + self.cfg.append(self.pp.localConfigFile) # this file is as input + + checkCmd = "dirac-admin-add-pilot %s %s %s %s %s --status=Running %s -d" % ( + self.pp.pilotRef, + self.pp.userDN, + self.pp.userGroup, + self.pp.flavour, + self.pilotStamp, + " ".join(self.cfg), + ) + retCode, _ = self.executeAndGetOutput(checkCmd, self.pp.installEnv) + if retCode: + self.log.error("Could not get execute dirac-admin-add-pilot [ERROR %d]" % retCode) + + class CheckCECapabilities(CommandBase): """Used to get CE tags and other relevant parameters.""" From d2878b03c38872a83ee19df0f1cf68b464daef8a Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Wed, 22 Mar 2023 16:45:21 +0100 Subject: [PATCH 2/8] fix: moved __setFlavour method in RegisterPilots command --- Pilot/pilotCommands.py | 231 +++++++++++++++++++++-------------------- 1 file changed, 116 insertions(+), 115 deletions(-) diff --git a/Pilot/pilotCommands.py b/Pilot/pilotCommands.py index 8860e97f..68aa5804 100644 --- a/Pilot/pilotCommands.py +++ b/Pilot/pilotCommands.py @@ -535,6 +535,8 @@ def __init__(self, pilotParams): def execute(self): """Calls dirac-admin-add-pilot""" + self.__setFlavour() + if self.pp.useServerCertificate: self.cfg.append("-o /DIRAC/Security/UseServerCertificate=yes") if self.pp.localConfigFile: @@ -554,6 +556,120 @@ def execute(self): if retCode: self.log.error("Could not get execute dirac-admin-add-pilot [ERROR %d]" % retCode) + def __setFlavour(self): + + pilotRef = "Unknown" + self.pp.flavour = "Generic" + + # If pilot reference is specified at submission, then set flavour to DIRAC + # unless overridden by presence of batch system environment variables + if self.pp.pilotReference: + self.pp.flavour = "DIRAC" + pilotRef = self.pp.pilotReference + + # # Batch systems + + # Take the reference from the Torque batch system + if "PBS_JOBID" in os.environ: + self.pp.flavour = "SSHTorque" + pilotRef = "sshtorque://" + self.pp.ceName + "/" + os.environ["PBS_JOBID"].split(".")[0] + + # Take the reference from the OAR batch system + if "OAR_JOBID" in os.environ: + self.pp.flavour = "SSHOAR" + pilotRef = "sshoar://" + self.pp.ceName + "/" + os.environ["OAR_JOBID"] + + # Grid Engine + if "JOB_ID" in os.environ and "SGE_TASK_ID" in os.environ: + self.pp.flavour = "SSHGE" + pilotRef = "sshge://" + self.pp.ceName + "/" + os.environ["JOB_ID"] + # Generic JOB_ID + elif "JOB_ID" in os.environ: + self.pp.flavour = "Generic" + pilotRef = "generic://" + self.pp.ceName + "/" + os.environ["JOB_ID"] + + # LSF + if "LSB_BATCH_JID" in os.environ: + self.pp.flavour = "SSHLSF" + pilotRef = "sshlsf://" + self.pp.ceName + "/" + os.environ["LSB_BATCH_JID"] + + # SLURM batch system + if "SLURM_JOBID" in os.environ: + self.pp.flavour = "SSHSLURM" + pilotRef = "sshslurm://" + self.pp.ceName + "/" + os.environ["SLURM_JOBID"] + + # Condor + if "CONDOR_JOBID" in os.environ: + self.pp.flavour = "SSHCondor" + pilotRef = "sshcondor://" + self.pp.ceName + "/" + os.environ["CONDOR_JOBID"] + + # # CEs + + # HTCondor + if "HTCONDOR_JOBID" in os.environ: + self.pp.flavour = "HTCondorCE" + pilotRef = "htcondorce://" + self.pp.ceName + "/" + os.environ["HTCONDOR_JOBID"] + + # This is the CREAM direct submission case + if "CREAM_JOBID" in os.environ: + self.pp.flavour = "CREAM" + pilotRef = os.environ["CREAM_JOBID"] + + if "OSG_WN_TMP" in os.environ: + self.pp.flavour = "OSG" + + # GLOBUS Computing Elements + if "GLOBUS_GRAM_JOB_CONTACT" in os.environ: + self.pp.flavour = "GLOBUS" + pilotRef = os.environ["GLOBUS_GRAM_JOB_CONTACT"] + + # Direct SSH tunnel submission + if "SSHCE_JOBID" in os.environ: + self.pp.flavour = "SSH" + pilotRef = "ssh://" + self.pp.ceName + "/" + os.environ["SSHCE_JOBID"] + + # Batch host SSH tunnel submission (SSHBatch CE) + if "SSHBATCH_JOBID" in os.environ and "SSH_NODE_HOST" in os.environ: + self.pp.flavour = "SSHBATCH" + pilotRef = ( + "sshbatchhost://" + + self.pp.ceName + + "/" + + os.environ["SSH_NODE_HOST"] + + "/" + + os.environ["SSHBATCH_JOBID"] + ) + + # ARC case + # JOBID does not provide the full url in recent versions of ARC + # JOBURL has been introduced recently and should be preferred when present + if "GRID_GLOBAL_JOBID" in os.environ: + self.pp.flavour = "ARC" + pilotRef = os.environ["GRID_GLOBAL_JOBID"] + + if "GRID_GLOBAL_JOBURL" in os.environ: + self.pp.flavour = "ARC" + pilotRef = os.environ["GRID_GLOBAL_JOBURL"] + + # # DIRAC specific + + # VMDIRAC case + if "VMDIRAC_VERSION" in os.environ: + self.pp.flavour = "VMDIRAC" + pilotRef = "vm://" + self.pp.ceName + "/" + os.environ["JOB_ID"] + + # Pilot reference is given explicitly in environment + if "PILOT_UUID" in os.environ: + pilotRef = os.environ["PILOT_UUID"] + + # Pilot reference is specified at submission + if self.pp.pilotReference: + pilotRef = self.pp.pilotReference + + self.log.debug("Flavour: %s; pilot reference: %s " % (self.pp.flavour, pilotRef)) + + self.pp.pilotReference = pilotRef + class CheckCECapabilities(CommandBase): """Used to get CE tags and other relevant parameters.""" @@ -757,7 +873,6 @@ def __init__(self, pilotParams): @logFinalizer def execute(self): """Setup configuration parameters""" - self.__setFlavour() self.cfg.append("-o /LocalSite/GridMiddleware=%s" % self.pp.flavour) self.cfg.append('-n "%s"' % self.pp.site) @@ -800,120 +915,6 @@ def execute(self): self.log.error("Could not configure DIRAC [ERROR %d]" % retCode) self.exitWithError(retCode) - def __setFlavour(self): - - pilotRef = "Unknown" - self.pp.flavour = "Generic" - - # If pilot reference is specified at submission, then set flavour to DIRAC - # unless overridden by presence of batch system environment variables - if self.pp.pilotReference: - self.pp.flavour = "DIRAC" - pilotRef = self.pp.pilotReference - - # # Batch systems - - # Take the reference from the Torque batch system - if "PBS_JOBID" in os.environ: - self.pp.flavour = "SSHTorque" - pilotRef = "sshtorque://" + self.pp.ceName + "/" + os.environ["PBS_JOBID"].split(".")[0] - - # Take the reference from the OAR batch system - if "OAR_JOBID" in os.environ: - self.pp.flavour = "SSHOAR" - pilotRef = "sshoar://" + self.pp.ceName + "/" + os.environ["OAR_JOBID"] - - # Grid Engine - if "JOB_ID" in os.environ and "SGE_TASK_ID" in os.environ: - self.pp.flavour = "SSHGE" - pilotRef = "sshge://" + self.pp.ceName + "/" + os.environ["JOB_ID"] - # Generic JOB_ID - elif "JOB_ID" in os.environ: - self.pp.flavour = "Generic" - pilotRef = "generic://" + self.pp.ceName + "/" + os.environ["JOB_ID"] - - # LSF - if "LSB_BATCH_JID" in os.environ: - self.pp.flavour = "SSHLSF" - pilotRef = "sshlsf://" + self.pp.ceName + "/" + os.environ["LSB_BATCH_JID"] - - # SLURM batch system - if "SLURM_JOBID" in os.environ: - self.pp.flavour = "SSHSLURM" - pilotRef = "sshslurm://" + self.pp.ceName + "/" + os.environ["SLURM_JOBID"] - - # Condor - if "CONDOR_JOBID" in os.environ: - self.pp.flavour = "SSHCondor" - pilotRef = "sshcondor://" + self.pp.ceName + "/" + os.environ["CONDOR_JOBID"] - - # # CEs - - # HTCondor - if "HTCONDOR_JOBID" in os.environ: - self.pp.flavour = "HTCondorCE" - pilotRef = "htcondorce://" + self.pp.ceName + "/" + os.environ["HTCONDOR_JOBID"] - - # This is the CREAM direct submission case - if "CREAM_JOBID" in os.environ: - self.pp.flavour = "CREAM" - pilotRef = os.environ["CREAM_JOBID"] - - if "OSG_WN_TMP" in os.environ: - self.pp.flavour = "OSG" - - # GLOBUS Computing Elements - if "GLOBUS_GRAM_JOB_CONTACT" in os.environ: - self.pp.flavour = "GLOBUS" - pilotRef = os.environ["GLOBUS_GRAM_JOB_CONTACT"] - - # Direct SSH tunnel submission - if "SSHCE_JOBID" in os.environ: - self.pp.flavour = "SSH" - pilotRef = "ssh://" + self.pp.ceName + "/" + os.environ["SSHCE_JOBID"] - - # Batch host SSH tunnel submission (SSHBatch CE) - if "SSHBATCH_JOBID" in os.environ and "SSH_NODE_HOST" in os.environ: - self.pp.flavour = "SSHBATCH" - pilotRef = ( - "sshbatchhost://" - + self.pp.ceName - + "/" - + os.environ["SSH_NODE_HOST"] - + "/" - + os.environ["SSHBATCH_JOBID"] - ) - - # ARC case - # JOBID does not provide the full url in recent versions of ARC - # JOBURL has been introduced recently and should be preferred when present - if "GRID_GLOBAL_JOBID" in os.environ: - self.pp.flavour = "ARC" - pilotRef = os.environ["GRID_GLOBAL_JOBID"] - - if "GRID_GLOBAL_JOBURL" in os.environ: - self.pp.flavour = "ARC" - pilotRef = os.environ["GRID_GLOBAL_JOBURL"] - - # # DIRAC specific - - # VMDIRAC case - if "VMDIRAC_VERSION" in os.environ: - self.pp.flavour = "VMDIRAC" - pilotRef = "vm://" + self.pp.ceName + "/" + os.environ["JOB_ID"] - - # Pilot reference is given explicitly in environment - if "PILOT_UUID" in os.environ: - pilotRef = os.environ["PILOT_UUID"] - - # Pilot reference is specified at submission - if self.pp.pilotReference: - pilotRef = self.pp.pilotReference - - self.log.debug("Flavour: %s; pilot reference: %s " % (self.pp.flavour, pilotRef)) - - self.pp.pilotReference = pilotRef - class ConfigureArchitecture(CommandBase): """This command simply calls dirac-platfom to determine the platform. From c8dc774497c2896ae47ff635010219830ba5db19 Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Wed, 22 Mar 2023 16:48:14 +0100 Subject: [PATCH 3/8] test: added RegisterPilot command for all integration tests --- tests/CI/pilot.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/CI/pilot.json b/tests/CI/pilot.json index d62ef9c0..f4be1881 100644 --- a/tests/CI/pilot.json +++ b/tests/CI/pilot.json @@ -7,6 +7,7 @@ "CheckWorkerNode", "InstallDIRAC", "ConfigureBasics", + "RegisterPilot", "CheckCECapabilities", "CheckWNCapabilities", "ConfigureSite", @@ -17,6 +18,7 @@ "CheckWorkerNode", "InstallDIRAC", "ConfigureBasics", + "RegisterPilot", "CheckCECapabilities", "CheckWNCapabilities", "ConfigureSite", @@ -27,6 +29,7 @@ "CheckWorkerNode", "InstallDIRAC", "ConfigureBasics", + "RegisterPilot", "CheckCECapabilities", "CheckWNCapabilities", "ConfigureSite", @@ -49,6 +52,7 @@ "CheckWorkerNode", "InstallDIRAC", "ConfigureBasics", + "RegisterPilot", "CheckCECapabilities", "CheckWNCapabilities", "ConfigureSite", @@ -59,6 +63,7 @@ "CheckWorkerNode", "InstallDIRAC", "ConfigureBasics", + "RegisterPilot", "CheckCECapabilities", "CheckWNCapabilities", "ConfigureSite", From 1554c9249da6855349f0847d0d0b8e3804cb599c Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Thu, 23 Mar 2023 14:47:23 +0100 Subject: [PATCH 4/8] fix: pilotReference is by default --- Pilot/pilotCommands.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Pilot/pilotCommands.py b/Pilot/pilotCommands.py index 68aa5804..dcb082ec 100644 --- a/Pilot/pilotCommands.py +++ b/Pilot/pilotCommands.py @@ -558,7 +558,7 @@ def execute(self): def __setFlavour(self): - pilotRef = "Unknown" + pilotRef = os.environ.get("DIRAC_PILOT_STAMP", "Unknown") self.pp.flavour = "Generic" # If pilot reference is specified at submission, then set flavour to DIRAC @@ -658,10 +658,6 @@ def __setFlavour(self): self.pp.flavour = "VMDIRAC" pilotRef = "vm://" + self.pp.ceName + "/" + os.environ["JOB_ID"] - # Pilot reference is given explicitly in environment - if "PILOT_UUID" in os.environ: - pilotRef = os.environ["PILOT_UUID"] - # Pilot reference is specified at submission if self.pp.pilotReference: pilotRef = self.pp.pilotReference From 34305e5dd1c1811c911f06f0931ee3c9e1c474cb Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Thu, 23 Mar 2023 14:55:36 +0100 Subject: [PATCH 5/8] feat: removed pilot switch for setting the pilotReference --- Pilot/pilotCommands.py | 51 ++++++++++++++++-------------------------- Pilot/pilotTools.py | 2 -- 2 files changed, 19 insertions(+), 34 deletions(-) diff --git a/Pilot/pilotCommands.py b/Pilot/pilotCommands.py index dcb082ec..1c0cfb35 100644 --- a/Pilot/pilotCommands.py +++ b/Pilot/pilotCommands.py @@ -545,7 +545,7 @@ def execute(self): self.cfg.append(self.pp.localConfigFile) # this file is as input checkCmd = "dirac-admin-add-pilot %s %s %s %s %s --status=Running %s -d" % ( - self.pp.pilotRef, + self.pp.pilotReference, self.pp.userDN, self.pp.userGroup, self.pp.flavour, @@ -558,62 +558,55 @@ def execute(self): def __setFlavour(self): - pilotRef = os.environ.get("DIRAC_PILOT_STAMP", "Unknown") - self.pp.flavour = "Generic" - - # If pilot reference is specified at submission, then set flavour to DIRAC - # unless overridden by presence of batch system environment variables - if self.pp.pilotReference: - self.pp.flavour = "DIRAC" - pilotRef = self.pp.pilotReference + self.pp.pilotReference = os.environ.get("DIRAC_PILOT_STAMP", self.pp.pilotReference) # # Batch systems # Take the reference from the Torque batch system if "PBS_JOBID" in os.environ: self.pp.flavour = "SSHTorque" - pilotRef = "sshtorque://" + self.pp.ceName + "/" + os.environ["PBS_JOBID"].split(".")[0] + self.pp.pilotReference = "sshtorque://" + self.pp.ceName + "/" + os.environ["PBS_JOBID"].split(".")[0] # Take the reference from the OAR batch system if "OAR_JOBID" in os.environ: self.pp.flavour = "SSHOAR" - pilotRef = "sshoar://" + self.pp.ceName + "/" + os.environ["OAR_JOBID"] + self.pp.pilotReference = "sshoar://" + self.pp.ceName + "/" + os.environ["OAR_JOBID"] # Grid Engine if "JOB_ID" in os.environ and "SGE_TASK_ID" in os.environ: self.pp.flavour = "SSHGE" - pilotRef = "sshge://" + self.pp.ceName + "/" + os.environ["JOB_ID"] + self.pp.pilotReference = "sshge://" + self.pp.ceName + "/" + os.environ["JOB_ID"] # Generic JOB_ID elif "JOB_ID" in os.environ: self.pp.flavour = "Generic" - pilotRef = "generic://" + self.pp.ceName + "/" + os.environ["JOB_ID"] + self.pp.pilotReference = "generic://" + self.pp.ceName + "/" + os.environ["JOB_ID"] # LSF if "LSB_BATCH_JID" in os.environ: self.pp.flavour = "SSHLSF" - pilotRef = "sshlsf://" + self.pp.ceName + "/" + os.environ["LSB_BATCH_JID"] + self.pp.pilotReference = "sshlsf://" + self.pp.ceName + "/" + os.environ["LSB_BATCH_JID"] # SLURM batch system if "SLURM_JOBID" in os.environ: self.pp.flavour = "SSHSLURM" - pilotRef = "sshslurm://" + self.pp.ceName + "/" + os.environ["SLURM_JOBID"] + self.pp.pilotReference = "sshslurm://" + self.pp.ceName + "/" + os.environ["SLURM_JOBID"] # Condor if "CONDOR_JOBID" in os.environ: self.pp.flavour = "SSHCondor" - pilotRef = "sshcondor://" + self.pp.ceName + "/" + os.environ["CONDOR_JOBID"] + self.pp.pilotReference = "sshcondor://" + self.pp.ceName + "/" + os.environ["CONDOR_JOBID"] # # CEs # HTCondor if "HTCONDOR_JOBID" in os.environ: self.pp.flavour = "HTCondorCE" - pilotRef = "htcondorce://" + self.pp.ceName + "/" + os.environ["HTCONDOR_JOBID"] + self.pp.pilotReference = "htcondorce://" + self.pp.ceName + "/" + os.environ["HTCONDOR_JOBID"] # This is the CREAM direct submission case if "CREAM_JOBID" in os.environ: self.pp.flavour = "CREAM" - pilotRef = os.environ["CREAM_JOBID"] + self.pp.pilotReference = os.environ["CREAM_JOBID"] if "OSG_WN_TMP" in os.environ: self.pp.flavour = "OSG" @@ -621,17 +614,17 @@ def __setFlavour(self): # GLOBUS Computing Elements if "GLOBUS_GRAM_JOB_CONTACT" in os.environ: self.pp.flavour = "GLOBUS" - pilotRef = os.environ["GLOBUS_GRAM_JOB_CONTACT"] + self.pp.pilotReference = os.environ["GLOBUS_GRAM_JOB_CONTACT"] # Direct SSH tunnel submission if "SSHCE_JOBID" in os.environ: self.pp.flavour = "SSH" - pilotRef = "ssh://" + self.pp.ceName + "/" + os.environ["SSHCE_JOBID"] + self.pp.pilotReference = "ssh://" + self.pp.ceName + "/" + os.environ["SSHCE_JOBID"] # Batch host SSH tunnel submission (SSHBatch CE) if "SSHBATCH_JOBID" in os.environ and "SSH_NODE_HOST" in os.environ: self.pp.flavour = "SSHBATCH" - pilotRef = ( + self.pp.pilotReference = ( "sshbatchhost://" + self.pp.ceName + "/" @@ -645,26 +638,20 @@ def __setFlavour(self): # JOBURL has been introduced recently and should be preferred when present if "GRID_GLOBAL_JOBID" in os.environ: self.pp.flavour = "ARC" - pilotRef = os.environ["GRID_GLOBAL_JOBID"] + self.pp.pilotReference = os.environ["GRID_GLOBAL_JOBID"] if "GRID_GLOBAL_JOBURL" in os.environ: self.pp.flavour = "ARC" - pilotRef = os.environ["GRID_GLOBAL_JOBURL"] + self.pp.pilotReference = os.environ["GRID_GLOBAL_JOBURL"] # # DIRAC specific # VMDIRAC case if "VMDIRAC_VERSION" in os.environ: self.pp.flavour = "VMDIRAC" - pilotRef = "vm://" + self.pp.ceName + "/" + os.environ["JOB_ID"] - - # Pilot reference is specified at submission - if self.pp.pilotReference: - pilotRef = self.pp.pilotReference - - self.log.debug("Flavour: %s; pilot reference: %s " % (self.pp.flavour, pilotRef)) + self.pp.pilotReference = "vm://" + self.pp.ceName + "/" + os.environ["JOB_ID"] - self.pp.pilotReference = pilotRef + self.log.debug("Flavour: %s; pilot reference: %s " % (self.pp.flavour, self.pp.pilotReference)) class CheckCECapabilities(CommandBase): @@ -884,7 +871,7 @@ def execute(self): if o == "-o" or o == "--option": self.cfg.append('-o "%s"' % v) - if self.pp.pilotReference != "Unknown": + if self.pp.pilotReference: self.cfg.append("-o /LocalSite/PilotReference=%s" % self.pp.pilotReference) if self.pp.useServerCertificate: diff --git a/Pilot/pilotTools.py b/Pilot/pilotTools.py index 3799e20f..f8e4f4b5 100644 --- a/Pilot/pilotTools.py +++ b/Pilot/pilotTools.py @@ -812,8 +812,6 @@ def __initCommandLine2(self): self.site = v elif o == "-y" or o == "--CEType": self.ceType = v - elif o == "-R" or o == "--reference": - self.pilotReference = v elif o == "-k" or o == "--keepPP": self.keepPythonPath = True elif o in ("-C", "--configurationServer"): From e21cfd853d8ce0bf105c8eeecce64de0a64cbf7e Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Thu, 23 Mar 2023 15:22:20 +0100 Subject: [PATCH 6/8] feat: added test for existance of dirac-admin-add-pilot --- Pilot/pilotCommands.py | 7 +++++-- Pilot/pilotTools.py | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Pilot/pilotCommands.py b/Pilot/pilotCommands.py index 1c0cfb35..6a918a44 100644 --- a/Pilot/pilotCommands.py +++ b/Pilot/pilotCommands.py @@ -48,9 +48,9 @@ def __init__(self, pilotParams): from pipes import quote try: - from Pilot.pilotTools import CommandBase, retrieveUrlTimeout + from Pilot.pilotTools import CommandBase, retrieveUrlTimeout, which except ImportError: - from pilotTools import CommandBase, retrieveUrlTimeout + from pilotTools import CommandBase, retrieveUrlTimeout, which ############################ @@ -527,6 +527,9 @@ def __init__(self, pilotParams): """c'tor""" super(RegisterPilot, self).__init__(pilotParams) + if not which("dirac-admin-add-pilot", self.pp.installEnv): + self.log.info("Skipping Pilot Command RegisterPilot, as executable dirac-admin-add-pilot does not exist") + # this variable contains the options that are passed to dirac-admin-add-pilot self.cfg = [] self.pilotStamp = os.environ.get("DIRAC_PILOT_STAMP", '') diff --git a/Pilot/pilotTools.py b/Pilot/pilotTools.py index f8e4f4b5..c0c829af 100644 --- a/Pilot/pilotTools.py +++ b/Pilot/pilotTools.py @@ -166,6 +166,25 @@ def retrieveUrlTimeout(url, fileName, log, timeout=0): raise x +def which(cmd, environDict=None): + """ + test if an executable exists, python2 compatible + (in python 3 one could simply use shutil.which(cmd)). + + If testing for the existance of a DIRAC command, + this would only work for pilots installing python3 DIRAC clients + """ + if not environDict: + environDict=os.environ + + for prefix in environDict["PATH"].split(os.pathsep): + filename = os.path.join(prefix, cmd) + executable = os.access(filename, os.X_OK) + if executable and os.path.isfile(filename): + return os.path.join(prefix, filename) + return False + + class ObjectLoader(object): """Simplified class for loading objects from a DIRAC installation. @@ -637,6 +656,7 @@ def __init__(self): "CheckWorkerNode", "InstallDIRAC", "ConfigureBasics", + "RegisterPilot", "CheckCECapabilities", "CheckWNCapabilities", "ConfigureSite", From ba3ae783ecb3d5f56f388db25cacfd51121e7bfa Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Mon, 27 Mar 2023 16:37:59 +0200 Subject: [PATCH 7/8] feat: removed old, not-anymore-useful user_data files --- Pilot/user_data_dc | 154 ------------------- Pilot/user_data_sc | 140 ----------------- Pilot/user_data_vm | 375 --------------------------------------------- 3 files changed, 669 deletions(-) delete mode 100755 Pilot/user_data_dc delete mode 100755 Pilot/user_data_sc delete mode 100644 Pilot/user_data_vm diff --git a/Pilot/user_data_dc b/Pilot/user_data_dc deleted file mode 100755 index f9daa283..00000000 --- a/Pilot/user_data_dc +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/sh -# -# Generic DIRAC pilot script for use with Docker containers, -# containing the following ##user_data___## substitutions: -# -# user_data_jobfeatures_url -# user_data_joboutputs_url -# user_data_machine_hostname -# user_data_machinefeatures_url -# user_data_machinetype -# user_data_option_dirac_opts -# user_data_option_dirac_pilot_url -# user_data_option_dirac_queue -# user_data_option_hostcert -# user_data_option_hostkey -# user_data_option_x509_proxy -# user_data_space -# user_data_url -# user_data_uuid -# -# Each substitution pattern may occur more than once in this template. If you -# are reading a processed file, then these substitutions will already have -# been made below. -# -# Andrew.McNab@cern.ch June 2018 -# -( -date --utc +"%Y-%m-%d %H:%M:%S %Z user_data_dc Start user_data_dc on `hostname`" - -export HOME=/scratch/plt -mkdir -p $HOME -cd $HOME - -# Record MJFJO if substituted here by VM lifecycle manager -export MACHINEFEATURES='##user_data_machinefeatures_url##' -export JOBFEATURES='##user_data_jobfeatures_url##' -export JOBOUTPUTS='##user_data_joboutputs_url##' - -export VM_UUID='##user_data_uuid##' - -if [ "$VM_UUID" = "" -a "$JOBFEATURES" != "" ] ; then - export VM_UUID=`cat $JOBFEATURES/job_id` -fi - -if [ "$VM_UUID" = "" ] ; then - # If still unset then just use the hostname from the VM lifecycle manager - export VM_UUID=`date +'%s.##user_data_vm_hostname##'` -fi - -export JOB_ID="##user_data_space##:$VM_UUID:##user_data_machinetype##" -export PILOT_UUID="sc://##user_data_space##/$JOB_ID" - -mkdir -p $HOME/grid-security -export X509_USER_PROXY=$HOME/grid-security/x509proxy.pem - -if [ ! -z "##user_data_option_x509_proxy##" ] ; then - # Simple if we are given an X.509 Proxy - cat < $X509_USER_PROXY -##user_data_option_x509_proxy## -X5_EOF - - cp $X509_USER_PROXY $HOME/grid-security/hostkey.pem - cp $X509_USER_PROXY $HOME/grid-security/hostcert.pem - -elif [ ! -z "##user_data_option_hostkey##" -a ! -z "##user_data_option_hostcert##" ] ; then - # Given full host cert/key pair - - cat < $HOME/grid-security/hostkey.pem -##user_data_option_hostkey## -X5_EOF - - cat < $HOME/grid-security/hostcert.pem -##user_data_option_hostcert## -X5_EOF - - cat $HOME/grid-security/hostkey.pem $HOME/grid-security/hostcert.pem > $X509_USER_PROXY -else - date --utc +"%Y-%m-%d %H:%M:%S %Z Neither user_data_option_x509_proxy or user_data_option_hostkey/_hostcert defined!" -fi - -chmod 0400 $HOME/grid-security/*.pem - -# Get CA certs from cvmfs -ln -sf /cvmfs/grid.cern.ch/etc/grid-security/ $HOME/grid-security/ -export X509_CERT_DIR=$HOME/grid-security/certificates - -. /cvmfs/grid.cern.ch/emi3wn-latest/etc/profile.d/a1_grid_env.sh -. /cvmfs/grid.cern.ch/emi3wn-latest/etc/profile.d/setup-wn-example.sh - -# Log HTTP proxies used for cvmfs -attr -g proxy /mnt/.ro -for i in /cvmfs/* -do - attr -g proxy $i -done - -# Fetch the DIRAC pilot scripts -if [ '##user_data_option_dirac_pilot_url##' != '' ] ; then - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR '##user_data_option_dirac_pilot_url##' -elif [ '##user_data_url##' != '' ] ; then - # Remove user_data file name back to final slash - user_data_dir=`echo '##user_data_url##' | sed 's:[^/]*$::'` - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR "$user_data_dir" -else - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR https://lhcb-portal-dirac.cern.ch/pilot/ -fi - -SUBMIT_POOL_OPTS="-o '/LocalSite/SubmitPool=Test'" - -# Source any include scripts we fetched -for i in include_dc_*.sh -do - if [ -r "$i" ] ; then - . ./$i - fi -done - -if [ '##user_data_option_dirac_queue##' != '' ] ; then - QUEUE='##user_data_option_dirac_queue##' -else - QUEUE=default -fi - -# Now run the pilot script -python $HOME/dirac-pilot.py \ - --debug \ - --Name '##user_data_space##' \ - --Queue $QUEUE \ - $SUBMIT_POOL_OPTS \ - --MaxCycles 1 \ - --cert --certLocation $HOME/grid-security \ - ##user_data_option_dirac_opts## \ - >##user_data_joboutputs_url##/dirac-pilot.log 2>&1 - -# Save JobAgent and System logs to the $JOBOUTPUTS volume -cp -f $HOME/jobagent.*.log $HOME/shutdown_message* ##user_data_joboutputs_url## - -if [ "$DEPO_BASE_URL" != ""] ; then -# This will be replaced by extended pilot logging?? -( - cd ##user_data_joboutputs_url## - for i in * - do - if [ -f $i ] ; then - curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file "$i" \ - "$DEPO_BASE_URL/##user_data_space##/##user_data_machinetype##/##user_data_machine_hostname##/##user_data_uuid##/" - fi - done -) -fi - -sleep 30 - -) >##user_data_joboutputs_url##/user_data_script.log 2>&1 diff --git a/Pilot/user_data_sc b/Pilot/user_data_sc deleted file mode 100755 index a2c60b4b..00000000 --- a/Pilot/user_data_sc +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/sh -# -# Generic DIRAC pilot script for use with Singularity and Docker containers, -# containing the following ##user_data___## substitutions: -# -# user_data_jobfeatures_url -# user_data_joboutputs_url -# user_data_machine_hostname -# user_data_machinefeatures_url -# user_data_machinetype -# user_data_option_dirac_opts -# user_data_option_dirac_pilot_url -# user_data_option_dirac_queue -# user_data_option_hostcert -# user_data_option_hostkey -# user_data_option_x509_proxy -# user_data_space -# user_data_url -# user_data_uuid -# -# Each substitution pattern may occur more than once in this template. If you -# are reading a processed file, then these substitutions will already have -# been made below. -# -# Andrew.McNab@cern.ch October 2017 -# -( -date --utc +"%Y-%m-%d %H:%M:%S %Z user_data_sc Start user_data_sc on `hostname`" - -cd $HOME - -# Record MJFJO if substituted here by VM lifecycle manager -export MACHINEFEATURES='##user_data_machinefeatures_url##' -export JOBFEATURES='##user_data_jobfeatures_url##' -export JOBOUTPUTS='##user_data_joboutputs_url##' - -export VM_UUID='##user_data_uuid##' - -if [ "$VM_UUID" = "" -a "$JOBFEATURES" != "" ] ; then - export VM_UUID=`cat $JOBFEATURES/job_id` -fi - -if [ "$VM_UUID" = "" ] ; then - # If still unset then just use the hostname from the VM lifecycle manager - export VM_UUID=`date +'%s.##user_data_vm_hostname##'` -fi - -export JOB_ID="##user_data_space##:$VM_UUID:##user_data_machinetype##" -export PILOT_UUID="sc://##user_data_space##/$JOB_ID" - -mkdir -p $HOME/grid-security -export X509_USER_PROXY=$HOME/grid-security/x509proxy.pem - -if [ ! -z "##user_data_option_x509_proxy##" ] ; then - # Simple if we are given an X.509 Proxy - cat < $X509_USER_PROXY -##user_data_option_x509_proxy## -X5_EOF - - cp $X509_USER_PROXY $HOME/grid-security/hostkey.pem - cp $X509_USER_PROXY $HOME/grid-security/hostcert.pem - -elif [ ! -z "##user_data_option_hostkey##" -a ! -z "##user_data_option_hostcert##" ] ; then - # Given full host cert/key pair - - cat < $HOME/grid-security/hostkey.pem -##user_data_option_hostkey## -X5_EOF - - cat < $HOME/grid-security/hostcert.pem -##user_data_option_hostcert## -X5_EOF - - cat $HOME/grid-security/hostkey.pem $HOME/grid-security/hostcert.pem > $X509_USER_PROXY -else - date --utc +"%Y-%m-%d %H:%M:%S %Z Neither user_data_option_x509_proxy or user_data_option_hostkey/_hostcert defined!" -fi - -chmod 0400 $HOME/grid-security/*.pem - -# Get CA certs from cvmfs -ln -sf /cvmfs/grid.cern.ch/etc/grid-security/ $HOME/grid-security/ -export X509_CERT_DIR=$HOME/grid-security/certificates - -. /cvmfs/grid.cern.ch/emi3wn-latest/etc/profile.d/a1_grid_env.sh -. /cvmfs/grid.cern.ch/emi3wn-latest/etc/profile.d/setup-wn-example.sh - -# Log HTTP proxies used for cvmfs -attr -g proxy /mnt/.ro -for i in /cvmfs/* -do - attr -g proxy $i -done - -# Fetch the DIRAC pilot scripts -if [ '##user_data_option_dirac_pilot_url##' != '' ] ; then - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR '##user_data_option_dirac_pilot_url##' -elif [ '##user_data_url##' != '' ] ; then - # Remove user_data file name back to final slash - user_data_dir=`echo '##user_data_url##' | sed 's:[^/]*$::'` - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR "$user_data_dir" -else - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR https://lhcb-portal-dirac.cern.ch/pilot/ -fi - -if [ '##user_data_option_dirac_queue##' != '' ] ; then - QUEUE='##user_data_option_dirac_queue##' -else - QUEUE=default -fi - -# Now run the pilot script -python $HOME/dirac-pilot.py \ - --debug \ - -o '/LocalSite/SubmitPool=Test' \ - --Name '##user_data_space##' \ - --Queue $QUEUE \ - --MaxCycles 1 \ - --cert --certLocation $HOME/grid-security \ - ##user_data_option_dirac_opts## \ - >##user_data_joboutputs_url##/dirac-pilot.log 2>&1 - -# Save JobAgent and System logs -cp -f $HOME/jobagent.*.log $HOME/shutdown_message* ##user_data_joboutputs_url## - -( - cd ##user_data_joboutputs_url## - for i in * - do - if [ -f $i ] ; then - # This will be replaced by extended pilot logging?? - curl --capath $X509_CERT_DIR --cert $HOME/grid-security/x509proxy.pem --cacert $HOME/grid-security/x509proxy.pem --location --upload-file "$i" \ - "https://lhcb-depo.cern.ch:9132/hosts/##user_data_space##/##user_data_machinetype##/##user_data_machine_hostname##/##user_data_uuid##/" - fi - done -) - -sleep 30 - -) >##user_data_joboutputs_url##/user_data_script.log 2>&1 diff --git a/Pilot/user_data_vm b/Pilot/user_data_vm deleted file mode 100644 index d4c7ceef..00000000 --- a/Pilot/user_data_vm +++ /dev/null @@ -1,375 +0,0 @@ -From nobody Wed Nov 18 09:59:29 2015 -Comments: - # - # Generic DIRAC Cloud Init user_data template file for use with VMs, and - # containing the following ##user_data___## substitutions: - # - # user_data_file_hostkey - # user_data_file_hostcert - # user_data_option_cvmfs_proxy - # user_data_space - # - # Each substitution pattern may occur more than once in this template. If you - # are reading a processed file, then these substitutions will already have - # been made below. - # - # This file should normally be processed by Vac (version 0.20.0 onwards) or - # Vcycle (0.3.0 onwards) internally. - # - # Andrew.McNab@cern.ch January 2016 - # -Content-Type: multipart/mixed; boundary="===============3141592653589793238==" -MIME-Version: 1.0 - ---===============3141592653589793238== -MIME-Version: 1.0 -Content-Type: text/cloud-config; charset="us-ascii" -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="cloud-config-file" - -# cloud-config - -cvmfs: - local: - CVMFS_REPOSITORIES: grid - CVMFS_HTTP_PROXY: ##user_data_option_cvmfs_proxy## - ---===============3141592653589793238== -MIME-Version: 1.0 -Content-Type: text/ucernvm; charset="us-ascii" -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="ucernvm-file" - -[ucernvm-begin] -resize_rootfs=off -cvmfs_http_proxy='##user_data_option_cvmfs_proxy##' -[ucernvm-end] - ---===============3141592653589793238== -MIME-Version: 1.0 -Content-Type: text/x-shellscript; charset="us-ascii" -Content-Transfer-Encoding: 7bit -Content-Disposition: attachment; filename="user_data_script" - -#!/bin/sh - -mkdir -p /var/spool/joboutputs -( -# Set the hostname if available; display otherwise -hostname ##user_data_machine_hostname## -date --utc +"%Y-%m-%d %H:%M:%S %Z user_data_script Start user_data on `hostname`" - -echo 1 > /proc/sys/net/ipv6/conf/all/disable_ipv6 -date --utc +"%Y-%m-%d %H:%M:%S %Z Disable IPv6" - -# Cloud Init should do this automatically but something has changed since cernvm3 -> cernvm4 -ls -l /root/.ssh/authorized_keys -curl http://169.254.169.254/2009-04-04/meta-data/public-keys/0/openssh-key > /root/.ssh/authorized_keys -echo >> /root/.ssh/authorized_keys -ls -l /root/.ssh/authorized_keys - -# Record MJFJO if substituted here by VM lifecycle manager -export MACHINEFEATURES='##user_data_machinefeatures_url##' -export JOBFEATURES='##user_data_jobfeatures_url##' -export JOBOUTPUTS='##user_data_joboutputs_url##' - -# Save whatever we use by other scripts -/bin/echo -e "export MACHINEFEATURES=$MACHINEFEATURES\nexport JOBFEATURES=$JOBFEATURES\nexport JOBOUTPUTS=$JOBOUTPUTS" > /etc/profile.d/mjf.sh -/bin/echo -e "setenv MACHINEFEATURES $MACHINEFEATURES\nsetenv JOBFEATURES $JOBFEATURES\nsetenv JOBOUTPUTS $JOBOUTPUTS" > /etc/profile.d/mjf.csh - -export VM_UUID='##user_data_uuid##' -if [ "$VM_UUID" = "" -a "$JOBFEATURES" != "" ] ; then - export VM_UUID=`python -c "import urllib ; print urllib.urlopen('$JOBFEATURES/job_id').read().strip()"` -fi - -if [ "$VM_UUID" = "" ] ; then - # If still unset then just use date and hostname from the VM lifecycle manager - export VM_UUID=`date +'%s.##user_data_machine_hostname##'` -fi - -export JOB_ID="##user_data_space##:$VM_UUID:##user_data_machinetype##" -export PILOT_UUID="vm://##user_data_space##/$JOB_ID" - -# Create a shutdown_message if ACPI shutdown signal received -/bin/echo -e 'echo "100 VM received ACPI shutdown signal from hypervisor" > /var/spool/joboutputs/shutdown_message\n/sbin/shutdown -h now' >/etc/acpi/actions/power.sh -chmod +x /etc/acpi/actions/power.sh - -# Disable TCP offloading etc - done by default -if [ "##user_data_option_network_offload##" != "on" ] ; then - ethtool -K eth0 tso off gso off gro off -fi - -# We never let VMs send emails (likely to be annoying errors from root) -/sbin/iptables -A OUTPUT -p tcp --dport 25 -j DROP - -# Once we have finished with the metadata, stop any user process reading it later -/sbin/iptables -A OUTPUT -d 169.254.169.254 -p tcp --dport 80 -j DROP - -# Get the big 40GB+ logical partition as /scratch -mkdir -p /scratch -if [ -b /dev/vdb1 -a -b /dev/vdb2 ] ; then - # Openstack at CERN with cvm* flavor? - # vda1 is boot image, vdb1 is root partition, vdb2 is unformatted - mkfs -q -t ext4 /dev/vdb2 - mount /dev/vdb2 /scratch -elif [ -b /dev/sda1 -a -b /dev/sda2 -a -b /dev/sda3 ] ; then - # GCE: sda1 is boot image, sda2 is root partition, sda3 is unformatted - mkfs -q -t ext4 /dev/sda3 - mount /dev/sda3 /scratch -elif [ -b /dev/vdb1 ] ; then - # Openstack at CERN with hep* flavor? - # vda1 is boot image, vdb1 is root partition, and no vdb2 - # Since boot image is small, can use rest of vda for /scratch - echo -e 'n\np\n2\n\n\nw\n'| fdisk /dev/vda - mkfs -q -t ext4 /dev/vda2 - mount /dev/vda2 /scratch -elif [ -b /dev/vdb ] ; then - # Efficient virtio device - mkfs -q -t ext4 /dev/vdb - mount /dev/vdb /scratch -elif [ -b /dev/vda1 -a -b /dev/vda2 ] ; then - # We just have a big vda with unused space in vda2 - mkfs -q -t ext4 /dev/vda2 - mount /dev/vda2 /scratch -elif [ -b /dev/sdb ] ; then - # Virtual SCSI - mkfs -q -t ext4 /dev/sdb - mount /dev/sdb /scratch -elif [ -b /dev/hdb ] ; then - # Virtual IDE - mkfs -q -t ext4 /dev/hdb - mount /dev/hdb /scratch -elif [ -b /dev/xvdb ] ; then - # Xen virtual disk device - mkfs -q -t ext4 /dev/xvdb - mount /dev/xvdb /scratch -else - date --utc +'%Y-%m-%d %H:%M:%S %Z user_data_script Missing vdb/hdb/sdb/xvdb block device for /scratch' - echo "500 Missing vdb/hdb/sdb block device for /scratch" > /var/spool/joboutputs/shutdown_message - /sbin/shutdown -h now - sleep 1234567890 -fi - -if [ -b /dev/vda ] ; then - # We rely on the hypervisor's disk I/O scheduling - echo 'noop' > /sys/block/vda/queue/scheduler - echo 'noop' > /sys/block/vdb/queue/scheduler -fi - -# anyone can create directories there -chmod ugo+rwxt /scratch - -# Scratch tmp for TMPDIR -mkdir -p /scratch/tmp -chmod ugo+rwxt /scratch/tmp - -mkdir -p /scratch/plt/etc/grid-security -export X509_USER_PROXY=/scratch/plt/etc/grid-security/x509proxy.pem - -if [ ! -z "##user_data_option_x509_proxy##" ] ; then - # Simple if we are given an X.509 Proxy - cat < $X509_USER_PROXY -##user_data_option_x509_proxy## -X5_EOF - - cp $X509_USER_PROXY /scratch/plt/etc/grid-security/hostkey.pem - cp $X509_USER_PROXY /scratch/plt/etc/grid-security/hostcert.pem - -elif [ ! -z "##user_data_file_hostkey##" -a ! -z "##user_data_file_hostcert##" ] ; then - # Given full host cert/key pair - # Old versions of Vac/Vcycle call this user_data_file_ rather than user_data_option_ - - cat < /scratch/plt/etc/grid-security/hostkey.pem -##user_data_file_hostkey## -X5_EOF - - cat < /scratch/plt/etc/grid-security/hostcert.pem -##user_data_file_hostcert## -X5_EOF -q - cat /scratch/plt/etc/grid-security/hostkey.pem /scratch/plt/etc/grid-security/hostcert.pem > $X509_USER_PROXY -else - date --utc +"%Y-%m-%d %H:%M:%S %Z Neither user_data_option_x509_proxy or user_data_option_hostkey/_hostcert defined!" -fi - -cp $X509_USER_PROXY /root/x509proxy.pem # Save a copy which will stay owned by root, for root to use -chmod 0400 /scratch/plt/etc/grid-security/*.pem /root/x509proxy.pem - -# Get CA certs from cvmfs -rm -Rf /etc/grid-security -ln -sf /cvmfs/grid.cern.ch/etc/grid-security /etc/grid-security -export X509_CERT_DIR=/etc/grid-security/certificates - -# These will be picked up by login etc shells -cat < /etc/profile.d/grid-paths.sh -export X509_CERT_DIR=/etc/grid-security/certificates -export X509_VOMS_DIR=/etc/grid-security/vomsdir -EOF - -# make a first heartbeat -echo 0.0 0.0 0.0 0.0 0.0 > /var/spool/joboutputs/heartbeat -/usr/bin/curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file /var/spool/joboutputs/heartbeat "$JOBOUTPUTS/heartbeat" -/usr/bin/curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file /var/spool/joboutputs/heartbeat "$JOBOUTPUTS/vm-heartbeat" - -# put heartbeat on MJF server every 5 minutes with a random offset -echo -e "*/5 * * * * root sleep `expr $RANDOM / 110` ; echo \`cut -f1-3 -d' ' /proc/loadavg\` \`cat /proc/uptime\` >/var/spool/joboutputs/heartbeat ; /usr/bin/curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file /var/spool/joboutputs/heartbeat $JOBOUTPUTS/vm-heartbeat ; /usr/bin/curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file /var/spool/joboutputs/heartbeat $JOBOUTPUTS/heartbeat >/var/log/heartbeat.log 2>&1" >/etc/cron.d/heartbeat - -# We swap on the logical partition (cannot on CernVM 3 aufs filesystem) -# Since ext4 we can use fallocate: -fallocate -l 4g /scratch/swapfile -chmod 0600 /scratch/swapfile -mkswap /scratch/swapfile -swapon /scratch/swapfile - -# Swap as little as possible -sysctl vm.swappiness=1 - -# Don't want to be doing this at 4 or 5am every day! -rm -f /etc/cron.daily/mlocate.cron - -# Log proxies used for cvmfs -attr -g proxy /mnt/.ro -for i in /cvmfs/* -do - attr -g proxy $i -done - -# Avoid age-old sudo problem -echo 'Defaults !requiretty' >>/etc/sudoers -echo 'Defaults visiblepw' >>/etc/sudoers - -# The pilot user account plt -/usr/sbin/useradd -b /scratch plt - -chown plt.plt /var/spool/joboutputs -chmod 0755 /var/spool/joboutputs - -# Add plt0102 etc accounts for the payloads that plt can sudo to - -# At most one jobagent per logical processor -processors=`grep '^processor[[:space:]]' /proc/cpuinfo | wc --lines` -for ((m=0; m < processors; m++)) -do - # Up to 100 successive payloads per jobagent - for ((n=0; n < 100; n++)) - do - payloaduser=`printf 'plt%02dp%02d' $m $n` - payloaduserid=`printf '1%02d%02d' $m $n` - - # Payload user home directory and dot files - mkdir /scratch/$payloaduser - cp -f /etc/skel/.*shrc /scratch/$payloaduser - cp -f /etc/skel/.bash* /scratch/$payloaduser - - # Add to /etc/passwd and /etc/group - echo "$payloaduser:x:$payloaduserid:$payloaduserid::/scratch/$payloaduser:/bin/bash" >>/etc/passwd - echo "$payloaduser:x:$payloaduserid:plt" >>/etc/group - - # Add the plt group as a secondary group - if [ "$payloaduser" = "plt00p00" ] ; then - sed -i "s/^plt:.*/&$payloaduser/" /etc/group - else - sed -i "s/^plt:.*/&,$payloaduser/" /etc/group - fi - - # Ownership and permissions of payload home directory - chown -R $payloaduser.$payloaduser /scratch/$payloaduser - chmod 0775 /scratch/$payloaduser - - # plt user can sudo to any payload user - echo "Defaults>$payloaduser !requiretty" >>/etc/sudoers - echo "Defaults>$payloaduser visiblepw" >>/etc/sudoers - echo "Defaults>$payloaduser !env_reset" >>/etc/sudoers - echo "plt ALL = ($payloaduser) NOPASSWD: ALL" >>/etc/sudoers - done -done - -cd /scratch/plt -# Fetch the DIRAC pilot scripts -if [ '##user_data_option_dirac_pilot_url##' != '' ] ; then - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR '##user_data_option_dirac_pilot_url##' -elif [ '##user_data_url##' != '' ] ; then - # Remove user_data file name back to final slash - user_data_dir=`echo '##user_data_url##' | sed 's:[^/]*$::'` - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR "$user_data_dir" -else - wget --no-directories --recursive --no-parent --execute robots=off --reject 'index.html*' --ca-directory=$X509_CERT_DIR https://lhcb-portal-dirac.cern.ch/pilot/ -fi - -# So payload accounts can create directories here, but not interfere -chown -R plt.plt /scratch/plt -chmod 1775 /scratch/plt - -# VO is not used within the user_data_script but is made available for include_vm_*.sh scripts -export VO='##user_data_option_vo##' -export DIRAC_OPTS='##user_data_option_dirac_opts##' - -if [ '##user_data_option_submit_pool##' != '' ] ; then - export SUBMIT_POOL=##user_data_option_submit_pool## -else - # Test matches any JobType - export SUBMIT_POOL=Test -fi - -if [ '##user_data_option_dirac_queue##' != '' ] ; then - export QUEUE='##user_data_option_dirac_queue##' -else - export QUEUE=default -fi - -# Source any include scripts we fetched -for i in include_vm_*.sh -do - if [ -r "$i" ] ; then - . ./$i - fi -done - -# Now run the pilot script -/usr/bin/sudo -i -n -u plt \ - X509_USER_PROXY=$X509_USER_PROXY \ - MACHINEFEATURES="$MACHINEFEATURES" JOBFEATURES="$JOBFEATURES" \ - JOB_ID="$JOB_ID" PILOT_UUID="$PILOT_UUID" \ - python /scratch/plt/dirac-pilot.py \ - --debug \ - -o /LocalSite/SubmitPool=$SUBMIT_POOL \ - --Name '##user_data_space##' \ - --Queue $QUEUE \ - --MaxCycles 1 \ - --CEType Sudo \ - --cert --certLocation /scratch/plt/etc/grid-security \ - $DIRAC_OPTS \ - >/var/spool/joboutputs/dirac-pilot.log 2>&1 - -# Save JobAgent and System logs -cp -f /scratch/plt/bashrc /scratch/plt/jobagent.*.log /scratch/plt/shutdown_message* /var/log/boot.log /var/log/dmesg /var/log/secure /var/log/messages* /etc/cvmfs/default.* \ - /var/spool/joboutputs/ - -( - cd /var/spool/joboutputs - for i in * - do - if [ -f $i ] ; then - curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file "$i" \ - "$JOBOUTPUTS/" - - if [ "$DEPO_BASE_URL" != "" ] ; then - # This will be replaced by extended pilot logging?? - curl --capath $X509_CERT_DIR --cert /root/x509proxy.pem --cacert /root/x509proxy.pem --location --upload-file "$i" \ - "$DEPO_BASE_URL/##user_data_space##/##user_data_machinetype##/##user_data_machine_hostname##/$VM_UUID/" - fi - fi - done -) - -# Try conventional shutdown -date --utc +'%Y-%m-%d %H:%M:%S %Z user_data_script Run /sbin/shutdown -h now' -/sbin/shutdown -h now -sleep 60 - -# If that fails, do an instant reboot -date --utc +'%Y-%m-%d %H:%M:%S %Z user_data_script Run echo o > /proc/sysrq-trigger' -echo o > /proc/sysrq-trigger - -) >/var/spool/joboutputs/user_data_script.log 2>&1 & ---===============3141592653589793238==-- From 56085f09216508bd446ad0d68bc23e999501b822 Mon Sep 17 00:00:00 2001 From: Federico Stagni Date: Tue, 28 Mar 2023 16:55:26 +0200 Subject: [PATCH 8/8] fix: extract DN in case of running with certificates --- Pilot/pilotCommands.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Pilot/pilotCommands.py b/Pilot/pilotCommands.py index 6a918a44..6d92d954 100644 --- a/Pilot/pilotCommands.py +++ b/Pilot/pilotCommands.py @@ -541,7 +541,19 @@ def execute(self): self.__setFlavour() if self.pp.useServerCertificate: - self.cfg.append("-o /DIRAC/Security/UseServerCertificate=yes") + self.cfg.append("-o /DIRAC/Security/UseServerCertificate=yes") + extractDNCommand = "openssl x509 -in %s/hostcert.pem " % self.pp.certsLocation + extractDNCommand += "-noout -subject -nameopt compat | sed 's/subject=//'" + retCode, res = self.executeAndGetOutput(extractDNCommand, self.pp.installEnv) + pilotOwnerDN = res.strip().split("\n")[-1] + if retCode: + self.log.error("Could not get execute %s [ERROR %d]" % (extractDNCommand, retCode)) + + pilotOwnerGroup = "certificate_group" + else: + pilotOwnerDN = self.pp.userDN + pilotOwnerGroup = self.pp.userGroup + if self.pp.localConfigFile: if LooseVersion(self.releaseVersion) >= self.cfgOptionDIRACVersion: self.cfg.append("--cfg") @@ -549,8 +561,8 @@ def execute(self): checkCmd = "dirac-admin-add-pilot %s %s %s %s %s --status=Running %s -d" % ( self.pp.pilotReference, - self.pp.userDN, - self.pp.userGroup, + pilotOwnerDN, + pilotOwnerGroup, self.pp.flavour, self.pilotStamp, " ".join(self.cfg),