From 191a318d35a820018c89c8c523dc8ce3d2af629a Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:24:37 +0000 Subject: [PATCH 01/54] removed use_x509userproxy (neither Liverpool nor RAL T1 have it) --- templates/condor_config.local.erb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index 1d24fc6..9be4769 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -4,7 +4,6 @@ CONDOR_IDS = 0.0 CONDOR_IDS = <%= @condor_uid %>.<%= @condor_gid %> <% end -%> CONDOR_ADMIN = <%= @condor_admin_email %> -use_x509userproxy = True PeriodicRemove = false <% if @request_memory -%> request_memory = int(JobMemoryLimit/1024.0) @@ -16,7 +15,7 @@ LeaveJobInQueue = (time() - CompletionDate) > <%= @leave_job_in_queue %> <% else -%> LeaveJobInQueue = False <% end -%> -SUBMIT_EXPRS = $(SUBMIT_EXPRS) use_x509userproxy,request_memory,LeaveJobInQueue +SUBMIT_EXPRS = $(SUBMIT_EXPRS) request_memory,LeaveJobInQueue DELEGATE_JOB_GSI_CREDENTIALS = False EMAIL_DOMAIN = <%= @email_domain %> From 0437afc4e3a543c2fdb9f6a191d16ea7cdb8a23a Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:25:44 +0000 Subject: [PATCH 02/54] bugfix for KERBEROS_MAP_FILE --- templates/10_security.config.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index fad0d00..bdf9213 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -64,7 +64,7 @@ SEC_ENABLE_MATCH_PASSWORD_AUTHENTICATION = True CERTIFICATE_MAPFILE = <%= @cert_map_file %> <% end -%> <% if @use_krb_map_file -%> -CERTIFICATE_MAPFILE = <%= @krb_map_file %> +KERBEROS_MAP_FILE = <%= @krb_map_file %> <% end -%> <% end -%> <% if @use_password_auth then -%> From 222f7172c748a2f02cd6fe310f1e9abc733e9c42 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:27:03 +0000 Subject: [PATCH 03/54] set ALLOW_DAEMON to a more general value --- templates/10_security.config.erb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index bdf9213..b4b41c9 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -42,7 +42,11 @@ COLLECTOR.ALLOW_ADVERTISE_STARTD = $(WNS) SCHEDD.ALLOW_WRITE = $(USERS), $(CES) -ALLOW_DAEMON = condor@$(UID_DOMAIN)/*.$(UID_DOMAIN), condor_pool@$(UID_DOMAIN)/*.$(UID_DOMAIN), $(FULL_HOSTNAME) +ALLOW_DAEMON = condor@$(UID_DOMAIN), \ + condor@$(UID_DOMAIN)/*.$(UID_DOMAIN), \ + condor_pool@$(UID_DOMAIN), \ + condor_pool@$(UID_DOMAIN)/*.$(UID_DOMAIN), \ + $(FULL_HOSTNAME) ALLOW_ADMINISTRATOR = root@$(UID_DOMAIN)/$(IP_ADDRESS), condor_pool@$(UID_DOMAIN)/$(IP_ADDRESS), $(CMS) ALLOW_CONFIG = root@$(FULL_HOSTNAME) From db4e80627d2f8d92f47788af900772175a3adbcf Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:31:40 +0000 Subject: [PATCH 04/54] added function to create manager, CE and WN lists with prefix --- .../parser/functions/join_machine_list.rb | 15 +++++++++++ manifests/config.pp | 22 ++++++++++++++- manifests/init.pp | 2 ++ .../functions/join_machine_list_spec.rb | 27 +++++++++++++++++++ templates/10_security.config.erb | 6 ++--- 5 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 lib/puppet/parser/functions/join_machine_list.rb create mode 100644 spec/unit/puppet/parser/functions/join_machine_list_spec.rb diff --git a/lib/puppet/parser/functions/join_machine_list.rb b/lib/puppet/parser/functions/join_machine_list.rb new file mode 100644 index 0000000..dd245b0 --- /dev/null +++ b/lib/puppet/parser/functions/join_machine_list.rb @@ -0,0 +1,15 @@ +module Puppet::Parser::Functions + newfunction(:join_machine_list, :type => :rvalue) do |args| + raise(Puppet::ParseError, "join_machine_list() wrong number of arguments. Given: #{args.size} for 2)") if args.size !=2 + prefix = args[0] + machine_list = args[1] + new_machine_list = Array.new + + machine_list.each do |item| + machine = prefix + item + new_machine_list.push machine + end + + return new_machine_list.join(", ") + end +end diff --git a/manifests/config.pp b/manifests/config.pp index 7fb0341..fdb150a 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -162,13 +162,33 @@ $use_cert_map_file = false, $use_krb_map_file = false, $cert_map_file = '/etc/condor/certificate_mapfile', - $krb_map_file = '/etc/condor/kerberos_mapfile',) { + $krb_map_file = '/etc/condor/kerberos_mapfile', + $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', + ) { $now = strftime('%d.%m.%Y_%H.%M') $ce_daemon_list = ['SCHEDD'] $worker_daemon_list = ['STARTD'] $ganglia_daemon_list = ['GANGLIAD'] $auth_string = construct_auth_string($use_fs_auth, $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) + + # because HTCondor uses user 'condor_pool' for remote access + # and user 'condor' for local the variables below need to include + # both users in case a machine has more than one role (i.e. manager + CE) + $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" + + $manager_string_remote = join_machine_list($machine_list_prefix, $managers) + $manager_string_local = join_machine_list($machine_prefix_local, $managers) + $manager_string = join([$manager_string_remote, $manager_string_local], ', ') + + $ce_string_remote = join_machine_list($machine_list_prefix, $computing_elements) + $ce_string_local = join_machine_list($machine_prefix_local, $computing_elements) + $ce_string = join([$ce_string_remote, $ce_string_local], ', ') + + $wn_string_remote = join_machine_list($machine_list_prefix, $worker_nodes) + $wn_string_local = join_machine_list($machine_prefix_local, $worker_nodes) + $wn_string = join([$wn_string_remote, $wn_string_local], ', ') + if $enable_multicore { $manage_daemon_list = ['COLLECTOR', 'NEGOTIATOR', 'DEFRAG'] } else { diff --git a/manifests/init.pp b/manifests/init.pp index 8a2b167..024dfd5 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -186,6 +186,7 @@ $use_krb_map_file = false, $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', + $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/' ) { class { 'htcondor::repositories': install_repos => $install_repositories, @@ -265,6 +266,7 @@ use_krb_map_file => $use_krb_map_file, cert_map_file => $cert_map_file, krb_map_file => $krb_map_file, + machine_list_prefix => $machine_list_prefix, } class { 'htcondor::service': diff --git a/spec/unit/puppet/parser/functions/join_machine_list_spec.rb b/spec/unit/puppet/parser/functions/join_machine_list_spec.rb new file mode 100644 index 0000000..046a33e --- /dev/null +++ b/spec/unit/puppet/parser/functions/join_machine_list_spec.rb @@ -0,0 +1,27 @@ +require 'spec_helper' +require 'puppetlabs_spec_helper/puppetlabs_spec/puppet_internals' + +describe "join_machine_list function" do + let(:scope) { PuppetlabsSpec::PuppetInternals.scope } + it "should exist" do + expect(Puppet::Parser::Functions.function("join_machine_list")).to eq("function_join_machine_list") + end + machine_prefix = 'condor_pool@$(UID_DOMAIN)/' + + context 'join_machine_list tests' do + it "single machine" do + result = scope.function_join_machine_list([machine_prefix,['test1.example.com']]) + expect(result).to eq(machine_prefix + 'test1.example.com') + end + it "single machine different prefix" do + prefix = 'root@$(UID_DOMAIN)/' + result = scope.function_join_machine_list([prefix, ['test1.example.com']]) + expect(result).to eq(prefix + 'test1.example.com') + end + it "multiple machines" do + result = scope.function_join_machine_list([machine_prefix,['test1.example.com', 'test2.example.com', 'test3.example.com']]) + expect(result).to eq('condor_pool@$(UID_DOMAIN)/test1.example.com, condor_pool@$(UID_DOMAIN)/test2.example.com, condor_pool@$(UID_DOMAIN)/test3.example.com') + end + end +end + diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index b4b41c9..5a750ae 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -15,9 +15,9 @@ TRUST_UID_DOMAIN = True <% end -%> # Machines & users -CMS = <%= @managers.flatten.join(', ') %> -CES = <%= @computing_elements.flatten.join(', ') %> -WNS = <%= @worker_nodes.flatten.join(', ') %> +CMS = <%= @manager_string %> +CES = <%= @ce_string %> +WNS = <%= @wn_string %> USERS = *@$(UID_DOMAIN) From 36b4cb055f9a371b71ee79a69164a8477f194654 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:42:44 +0000 Subject: [PATCH 05/54] HTCondor account mapping is now optional --- manifests/config.pp | 1 + manifests/init.pp | 2 ++ templates/condor_config.local.erb | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/manifests/config.pp b/manifests/config.pp index fdb150a..cff7f08 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -155,6 +155,7 @@ $template_workernode = "${module_name}/20_workernode.config.erb", $template_ganglia = "${module_name}/23_ganglia.config.erb", $template_defrag = "${module_name}/33_defrag.config.erb", + $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, $use_kerberos_auth = false, diff --git a/manifests/init.pp b/manifests/init.pp index 024dfd5..e06caf8 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -178,6 +178,7 @@ $template_ganglia = "${module_name}/23_ganglia.config.erb", $template_workernode = "${module_name}/20_workernode.config.erb", $template_defrag = "${module_name}/33_defrag.config.erb", + $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, $use_kerberos_auth = false, @@ -258,6 +259,7 @@ template_workernode => $template_workernode, template_ganglia => $template_ganglia, template_defrag => $template_defrag, + use_htcondor_account_mapping => $use_htcondor_account_mapping, use_fs_auth => $use_fs_auth, use_password_auth => $use_password_auth, use_kerberos_auth => $use_kerberos_auth, diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index 9be4769..b1d5615 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -19,7 +19,7 @@ SUBMIT_EXPRS = $(SUBMIT_EXPRS) request_memory,LeaveJobInQueue DELEGATE_JOB_GSI_CREDENTIALS = False EMAIL_DOMAIN = <%= @email_domain %> -<% if @is_ce == true -%> +<% if @is_ce == true and @use_htcondor_account_mapping == true -%> AcctSubGroup = \ ifThenElse(RequestCpus > 1, "multicore",\ ifThenElse(regexp("prd",Owner), "production",\ From f39f5dc899b3061f4dee734cc4d7ebe695ef3430 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:44:24 +0000 Subject: [PATCH 06/54] to avoid confusion, purge all unmanaged files from /etc/condor/config.d --- manifests/config.pp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/manifests/config.pp b/manifests/config.pp index cff7f08..cc56313 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -166,6 +166,13 @@ $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', ) { + # purge all non-managed config files from /etc/condor/config.d + file {'/etc/condor/config.d': + ensure => directory, + recurse => true, + purge => true, + } + $now = strftime('%d.%m.%Y_%H.%M') $ce_daemon_list = ['SCHEDD'] $worker_daemon_list = ['STARTD'] From 106894178140ef7599f487e6d94f277beb8ccaaa Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:45:58 +0000 Subject: [PATCH 07/54] allow read to anyone, block unwanted with iptables --- templates/10_security.config.erb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 5a750ae..189a394 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -32,7 +32,8 @@ HOSTALLOW_NEGOTIATOR = $(COLLECTOR_HOST) HOSTALLOW_ADMINISTRATOR = $(COLLECTOR_HOST) HOSTALLOW_NEGOTIATOR_SCHEDD = $(COLLECTOR_HOST) -ALLOW_READ = */*.$(UID_DOMAIN) +# allow read to anyone, block unwanted with iptables +ALLOW_READ = * ALLOW_WRITE = $(CMS), $(CES), $(WNS) #if the CE has a private NIC, it needs to be included here as well From 8cddc332dde6455e10c1ca9552ca0779b4871f76 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:48:17 +0000 Subject: [PATCH 08/54] made PID namespaces optional --- manifests/config.pp | 1 + manifests/init.pp | 2 ++ templates/20_workernode.config.erb | 2 ++ 3 files changed, 5 insertions(+) diff --git a/manifests/config.pp b/manifests/config.pp index cc56313..aeba4dd 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -162,6 +162,7 @@ $use_claim_to_be_auth = false, $use_cert_map_file = false, $use_krb_map_file = false, + $use_pid_namespaces = true, $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', diff --git a/manifests/init.pp b/manifests/init.pp index e06caf8..f9e2a7e 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -185,6 +185,7 @@ $use_claim_to_be_auth = false, $use_cert_map_file = false, $use_krb_map_file = false, + $use_pid_namespaces = true, $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/' @@ -266,6 +267,7 @@ use_claim_to_be_auth => $use_claim_to_be_auth, use_cert_map_file => $use_cert_map_file, use_krb_map_file => $use_krb_map_file, + use_pid_namespaces => $use_pid_namespaces, cert_map_file => $cert_map_file, krb_map_file => $krb_map_file, machine_list_prefix => $machine_list_prefix, diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index 2c60401..e8d3e26 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -75,7 +75,9 @@ MASTER_UPDATE_INTERVAL = $RANDOM_INTEGER(230, 370) EXECUTE = <%= @pool_home %>/condor ## Make sure jobs have independent PID namespaces +<% if @use_pid_namespaces -%> USE_PID_NAMESPACES = true +<% end -%> ## If the binaries are updated, let any running jobs finish before restarting MASTER_NEW_BINARY_RESTART=PEACEFUL From 9639dcf01bc76e8855f90370e90ddf99d82a1cf4 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:50:53 +0000 Subject: [PATCH 09/54] added knobs for max walltime/cputime --- manifests/config.pp | 2 ++ manifests/init.pp | 6 +++++- templates/12_resourcelimits.config.erb | 5 +++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index aeba4dd..fbce964 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -166,6 +166,8 @@ $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', + $max_walltime = '80 * 60 * 60', + $max_cputime = '80 * 60 * 60', ) { # purge all non-managed config files from /etc/condor/config.d file {'/etc/condor/config.d': diff --git a/manifests/init.pp b/manifests/init.pp index f9e2a7e..e07ff95 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -188,7 +188,9 @@ $use_pid_namespaces = true, $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', - $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/' + $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', + $max_walltime = '80 * 60 * 60', + $max_cputime = '80 * 60 * 60', ) { class { 'htcondor::repositories': install_repos => $install_repositories, @@ -271,6 +273,8 @@ cert_map_file => $cert_map_file, krb_map_file => $krb_map_file, machine_list_prefix => $machine_list_prefix, + max_walltime => $max_walltime, + max_cputime => $max_cputime, } class { 'htcondor::service': diff --git a/templates/12_resourcelimits.config.erb b/templates/12_resourcelimits.config.erb index babae98..d0757aa 100644 --- a/templates/12_resourcelimits.config.erb +++ b/templates/12_resourcelimits.config.erb @@ -1,8 +1,9 @@ # HTCondor configuration: resource limits +# This file will be deployed on every scheduler ## Time limits -RemoveDefaultJobWallTime = ( RemoteWallClockTime > 80 * 60 * 60 ) -RemoveDefaultJobCpuTime = ( RemoteSysCpu + RemoteUserCpu > 80 * 60 * 60 ) +RemoveDefaultJobWallTime = ( RemoteWallClockTime > <%= @max_walltime %> ) +RemoveDefaultJobCpuTime = ( RemoteSysCpu + RemoteUserCpu > <%= @max_cputime %> ) ## Memory usage limit RemoveMemoryUsage = ( ResidentSetSize_RAW > 1000*RequestMemory ) From e9a4db2485d94ce2cdbe51a4016571bb7d551709 Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 1 Feb 2016 12:03:24 +0000 Subject: [PATCH 10/54] setting PID name spaces to "false" by default (see #48) --- manifests/config.pp | 2 +- manifests/init.pp | 2 +- templates/20_workernode.config.erb | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index fbce964..cc396c5 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -162,7 +162,7 @@ $use_claim_to_be_auth = false, $use_cert_map_file = false, $use_krb_map_file = false, - $use_pid_namespaces = true, + $use_pid_namespaces = false, $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', diff --git a/manifests/init.pp b/manifests/init.pp index e07ff95..0bb536c 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -185,7 +185,7 @@ $use_claim_to_be_auth = false, $use_cert_map_file = false, $use_krb_map_file = false, - $use_pid_namespaces = true, + $use_pid_namespaces = false, $cert_map_file = '/etc/condor/certificate_mapfile', $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index e8d3e26..1adb613 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -77,6 +77,8 @@ EXECUTE = <%= @pool_home %>/condor ## Make sure jobs have independent PID namespaces <% if @use_pid_namespaces -%> USE_PID_NAMESPACES = true +<% else -%> +USE_PID_NAMESPACES = false <% end -%> ## If the binaries are updated, let any running jobs finish before restarting From 17522fdf9ca13611b34aee39ae934dad4d8eb8da Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 1 Feb 2016 14:46:36 +0000 Subject: [PATCH 11/54] fix for new security default in condor 8.4.X --- templates/10_security.config.erb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 189a394..9de0af6 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -48,6 +48,12 @@ ALLOW_DAEMON = condor@$(UID_DOMAIN), \ condor_pool@$(UID_DOMAIN), \ condor_pool@$(UID_DOMAIN)/*.$(UID_DOMAIN), \ $(FULL_HOSTNAME) +<% if @is_worker then -%> +# fix for new security default in HTCondor 8.4.X (fixed in 8.5.1) +ALLOW_DAEMON = $(ALLOW_DAEMON), \ + submit-side@matchsession/*, \ + execute-side@matchsession/* +<% end -%> ALLOW_ADMINISTRATOR = root@$(UID_DOMAIN)/$(IP_ADDRESS), condor_pool@$(UID_DOMAIN)/$(IP_ADDRESS), $(CMS) ALLOW_CONFIG = root@$(FULL_HOSTNAME) From 382b15a50235ebfb328e8903c4ce2632facb6a7e Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 16:37:22 +0000 Subject: [PATCH 12/54] added high-availability that is automatically deployed when >1 manager is specified --- manifests/config.pp | 15 ++++++++++++++- manifests/init.pp | 2 ++ templates/30_highavailability.config.erb | 10 +++------- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index cc396c5..461956a 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -155,6 +155,7 @@ $template_workernode = "${module_name}/20_workernode.config.erb", $template_ganglia = "${module_name}/23_ganglia.config.erb", $template_defrag = "${module_name}/33_defrag.config.erb", + $template_highavailability = "${module_name}/30_highavailability.config.erb", $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, @@ -436,7 +437,19 @@ mode => '0644', } - # TODO: high availability + if size($managers) > 1 { + $replication_machines = suffix($managers, ':$(REPLICATION_PORT)') + $had_machines = suffix($managers, ':$(HAD_PORT)') + $replication_list = join($replication_machines, ', ') + $had_list= join($had_machines, ', ') + file { '/etc/condor/config.d/30_highavailability.config': + content => template($template_highavailability), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + } + } } if $is_worker { diff --git a/manifests/init.pp b/manifests/init.pp index 0bb536c..17793a0 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -178,6 +178,7 @@ $template_ganglia = "${module_name}/23_ganglia.config.erb", $template_workernode = "${module_name}/20_workernode.config.erb", $template_defrag = "${module_name}/33_defrag.config.erb", + $template_highavailability = "${module_name}/30_highavailability.config.erb", $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, @@ -262,6 +263,7 @@ template_workernode => $template_workernode, template_ganglia => $template_ganglia, template_defrag => $template_defrag, + template_highavailability => $template_highavailability, use_htcondor_account_mapping => $use_htcondor_account_mapping, use_fs_auth => $use_fs_auth, use_password_auth => $use_password_auth, diff --git a/templates/30_highavailability.config.erb b/templates/30_highavailability.config.erb index fa24df6..aa0b51f 100644 --- a/templates/30_highavailability.config.erb +++ b/templates/30_highavailability.config.erb @@ -1,4 +1,4 @@ -HTCondor configuration: high availability - NOT FOR USE!!! +## HTCondor configuration: high availability ## Define the port number on which the condor_had daemon will ## listen. The port must match the port number used @@ -20,18 +20,14 @@ REPLICATION_ARGS = -p $(REPLICATION_PORT) ## as HAD_LIST. In addition, for each hostname, it should specify ## the port number of condor_replication daemon running on that host. ## This parameter is mandatory and has no default value -REPLICATION_LIST = \ - $(CENTRAL_MANAGER1):$(REPLICATION_PORT), \ - $(CENTRAL_MANAGER2):$(REPLICATION_PORT) +REPLICATION_LIST = <%= @replication_list -%> ## The following list must contain the same addresses in the same order ## as COLLECTOR_HOST. In addition, for each hostname, it should specify ## the port number of condor_had daemon running on that host. ## The first machine in the list will be the PRIMARY central manager ## machine, in case HAD_USE_PRIMARY is set to true. -HAD_LIST = \ - $(CENTRAL_MANAGER1):$(HAD_PORT), \ - $(CENTRAL_MANAGER2):$(HAD_PORT) +HAD_LIST = <%= @had_list -%> ## HAD connection time. From 847ee5d5e26d4a125b1bd1430b7c096bc327dab7 Mon Sep 17 00:00:00 2001 From: kreczko Date: Fri, 29 Jan 2016 11:31:40 +0000 Subject: [PATCH 13/54] added function to create manager, CE and WN lists with prefix --- templates/10_security.config.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 9de0af6..d28c012 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -41,7 +41,7 @@ COLLECTOR.ALLOW_ADVERTISE_MASTER = $(CES), $(CMS), $(WNS) COLLECTOR.ALLOW_ADVERTISE_SCHEDD = $(CES) COLLECTOR.ALLOW_ADVERTISE_STARTD = $(WNS) -SCHEDD.ALLOW_WRITE = $(USERS), $(CES) +SCHEDD.ALLOW_WRITE = $(USERS), $(CES), $(WNS) ALLOW_DAEMON = condor@$(UID_DOMAIN), \ condor@$(UID_DOMAIN)/*.$(UID_DOMAIN), \ From c0b46fff755cab67f099ef822af55ba3137a7eef Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 1 Feb 2016 11:56:45 +0000 Subject: [PATCH 14/54] move first parameters to params.pp --- manifests/config.pp | 3 +++ manifests/init.pp | 37 +++++++++++++------------------------ manifests/params.pp | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 24 deletions(-) create mode 100644 manifests/params.pp diff --git a/manifests/config.pp b/manifests/config.pp index 461956a..de9e78f 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -170,6 +170,9 @@ $max_walltime = '80 * 60 * 60', $max_cputime = '80 * 60 * 60', ) { + # TODO: instead of all the parameters, do it like https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp + # parameters are read from init, e.g. + # $::htcondor::cert_map_file # purge all non-managed config files from /etc/condor/config.d file {'/etc/condor/config.d': ensure => directory, diff --git a/manifests/init.pp b/manifests/init.pp index 17793a0..ab04675 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -99,18 +99,7 @@ # # Sample Usage: class htcondor ( - $accounting_groups = { - 'CMS' => { - priority_factor => 10000.00, - dynamic_quota => 0.80, - } - , - 'CMS.production' => { - priority_factor => 10000.00, - dynamic_quota => 0.95, - } - } - , + $accounting_groups = $::htcondor::params::accounting_groups, $cluster_has_multiple_domains = false, $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', $email_domain = 'localhost', @@ -168,17 +157,17 @@ $condor_uid = 0, $condor_gid = 0, # template selection. Allow for user to override - $template_config_local = "${module_name}/condor_config.local.erb", - $template_security = "${module_name}/10_security.config.erb", - $template_resourcelimits = "${module_name}/12_resourcelimits.config.erb", - $template_queues = "${module_name}/13_queues.config.erb", - $template_schedd = "${module_name}/21_schedd.config.erb", - $template_fairshares = "${module_name}/11_fairshares.config.erb", - $template_manager = "${module_name}/22_manager.config.erb", - $template_ganglia = "${module_name}/23_ganglia.config.erb", - $template_workernode = "${module_name}/20_workernode.config.erb", - $template_defrag = "${module_name}/33_defrag.config.erb", - $template_highavailability = "${module_name}/30_highavailability.config.erb", + $template_config_local = $::htcondor::params::template_config_local, + $template_security = $::htcondor::params::template_security, + $template_resourcelimits = $::htcondor::params::template_resourcelimits, + $template_queues = $::htcondor::params::template_queues, + $template_schedd = $::htcondor::params::template_schedd, + $template_fairshares = $::htcondor::params::template_fairshares, + $template_manager = $::htcondor::params::template_manager, + $template_ganglia = $::htcondor::params::template_ganglia, + $template_workernode = $::htcondor::params::template_workernode, + $template_defrag = $::htcondor::params::template_defrag, + $template_highavailability = $::htcondor::params::template_highavailability, $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, @@ -192,7 +181,7 @@ $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', $max_walltime = '80 * 60 * 60', $max_cputime = '80 * 60 * 60', - ) { + ) inherits ::htcondor::params { class { 'htcondor::repositories': install_repos => $install_repositories, dev_repos => $dev_repositories, diff --git a/manifests/params.pp b/manifests/params.pp new file mode 100644 index 0000000..9c7dfcc --- /dev/null +++ b/manifests/params.pp @@ -0,0 +1,39 @@ +# htcondor::params +class htcondor::params { + $default_accounting_groups = { + 'CMS' => { + priority_factor => 10000.00, + dynamic_quota => 0.80, + } + , + 'CMS.production' => { + priority_factor => 10000.00, + dynamic_quota => 0.95, + } + } + $accounting_groups = hiera_hash('accounting_groups', + $default_accounting_groups) + + $template_config_local = hiera('template_config_local', "${module_name}/condor_config.local.erb" + ) + $template_security = hiera('template_security', "${module_name}/10_security.config.erb" + ) + $template_resourcelimits = hiera('template_resourcelimits', "${module_name}/12_resourcelimits.config.erb" + ) + $template_queues = hiera('template_queues', "${module_name}/13_queues.config.erb" + ) + $template_schedd = hiera('template_schedd', "${module_name}/21_schedd.config.erb" + ) + $template_fairshares = hiera('template_fairshares', "${module_name}/11_fairshares.config.erb" + ) + $template_collector = hiera('template_collector', "${module_name}/22_collector.config.erb" + ) + $template_ganglia = hiera('template_ganglia', "${module_name}/23_ganglia.config.erb" + ) + $template_workernode = hiera('template_workernode', "${module_name}/20_workernode.config.erb" + ) + $template_defrag = hiera('template_defrag', "${module_name}/33_defrag.config.erb" + ) + $template_highavailability = hiera('template_defrag', "${module_name}/30_highavailability.config.erb" + ) +} From ee664c6cfb2e01d2c539ad25b77d439a7b7498bd Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 11:08:54 +0000 Subject: [PATCH 15/54] improved README --- README.md | 56 +++++++++++++++++++++++++++++++++++++++++++-------- metadata.json | 4 ++-- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 617d259..500158e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -Puppet module for HTCondor batch system -========================================= +#Puppet module for HTCondor batch system + Latest stable version: https://github.com/HEP-Puppet/htcondor/releases/tag/v1.3.1 Development branch (heading for 2.0.0): https://github.com/HEP-Puppet/htcondor/tree/development @@ -8,13 +8,53 @@ Development branch (heading for 2.0.0): https://github.com/HEP-Puppet/htcondor/t Puppetforge: https://forge.puppetlabs.com/HEPPuppet/htcondor -Installation requirements -========================================= -- pool_password file created with ```condor_store_cred -f /files/pool_password``` -Tests -========================================= +####Table of Contents +1. [Overview - What is the htcondor module?](#overview) +2. [Module Description - What does the module do?](#module-description) +3. [Setup - The basics of getting started with htcondor](#setup) +4. [Limitations - OS compatibility, etc.](#limitations) +7. [Development - Guide for contributing to the module](#development) + * [Contributing to the htcondor module](#contributing) + * [Running tests - A quick guide](#running-tests) + +##Overview +The htcondor modules allows you to set up a HTCondor cluster (https://research.cs.wisc.edu/htcondor/). +It depends on several other modules, including puppetlabs/(stdlib|concat|firewall). +Please check the metadata.json for detailed dependencies. + +##Module Description +An HTCondor cluster consists of at least three types of nodes: + * a worker for executing the jobs + * a scheduler for job submission + * a collector/negotiator to match jobs with workers + +This puppet modules allows for the configuration of these three types of nodes. + + +##Setup +**What the htcondor module affects:** + * configuration files and directories (/etc/condor/*) + * installation of htcondor software (condor* packages) + * a new fact for facter: condor_version + +###Beginning with HTCondor +Since admins might wish to run their own repository or disable repositories after install, +the HTCondor repository is no longer included in the Puppet module since version 2.0.0. +Therefore, the first step is to install the latest HTCondor repository for your OS (https://research.cs.wisc.edu/htcondor/yum/): +``` +yum install -y https://research.cs.wisc.edu/htcondor/yum/repo.d/htcondor-stable-rhel6.repo +``` +If you wish to use a [pool password for authentication](http://research.cs.wisc.edu/htcondor/manual/v8.4/3_6Security.html#SECTION00463400000000000000) you will need to create one first: ```condor_store_cred -f /files/pool_password```. + +##Limitations +###General + + +##Development + +###Contributing +###Running tests Please run ```bundle exec rake validate && bundle exec rake lint && bundle exec rake spec SPEC_OPTS='--format documentation'``` and make sure no errors are present when submitting code. - diff --git a/metadata.json b/metadata.json index 136527b..9799a5d 100644 --- a/metadata.json +++ b/metadata.json @@ -1,6 +1,6 @@ { "name": "HEPPuppet-htcondor", - "version": "1.3.1", + "version": "2.0.0", "summary": "Puppet module for HTCondor batch system", "author": "HEPPuppet", "dependencies": [ @@ -16,4 +16,4 @@ "source": "git@github.com:HEP-Puppet/puppet-htcondor.git", "project_page": "https://github.com/HEP-Puppet", "license": "Apache License, Version 2.0" -} \ No newline at end of file +} From 2da080e00cc2f85f5e4bc3e3c50cf3fbd6c77869 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 11:23:54 +0000 Subject: [PATCH 16/54] added email_domain and condo_admin_email to params.pp --- manifests/config.pp | 6 ++---- manifests/init.pp | 29 +++++++++++++++++------------ manifests/params.pp | 4 ++++ templates/condor_config.local.erb | 2 +- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index de9e78f..3d3b84d 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -1,7 +1,5 @@ # Class htcondor::config # -# Configuration deployment for HTCondor -# # == Parameters: # # [*accounting_groups*] @@ -93,9 +91,7 @@ , $cluster_has_multiple_domains = false, $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', - $email_domain = 'localhost', $computing_elements = [], - $condor_admin_email = 'root@mysite.org', $custom_attribute = 'NORDUGRID_QUEUE', $enable_cgroup = false, $enable_multicore = false, @@ -173,6 +169,8 @@ # TODO: instead of all the parameters, do it like https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp # parameters are read from init, e.g. # $::htcondor::cert_map_file + $email_domain = $::htcondor::email_domain + $admin_email = $::htcondor::admin_email # purge all non-managed config files from /etc/condor/config.d file {'/etc/condor/config.d': ensure => directory, diff --git a/manifests/init.pp b/manifests/init.pp index ab04675..2d9d525 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -1,6 +1,11 @@ # Class: htcondor # -# This module manages htcondor +# This module manages htcondor. Parameters that refer to condor 'knobs' (e.g. +# CONDOR_ADMIN) will not be explained here. +# Instead please refer to the HTCondor documentation: +# http://research.cs.wisc.edu/htcondor/manual/latest/3_3Configuration.html +# +# Defaults for the parameters can be found in htcondor::params # # == Parameters: # @@ -12,7 +17,6 @@ # [*cluster_has_multiple_domains*] # Specifies if the cluster has more than one domain. If true it will set # TRUST_UID_DOMAIN = True in 10_security.config -# Default: false # # [*collector_name*] # Sets COLLECTOR_NAME in 22_manager.config @@ -21,8 +25,9 @@ # [*computing_elements*] # List of CEs that have access to this HTCondor pool # -# [*condor_admin_email*] -# Contact email for the pool admin. Sets CONDOR_ADMIN. +# [*admin_email*] +# Sets CONDOR_ADMIN +# (http://research.cs.wisc.edu/htcondor/manual/latest/3_3Configuration.html). # # [*custom_attribute*] # Can be used to specify a ClassAd via custom_attribute = True. This is useful @@ -102,9 +107,9 @@ $accounting_groups = $::htcondor::params::accounting_groups, $cluster_has_multiple_domains = false, $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', - $email_domain = 'localhost', + $email_domain = $::htcondor::params::email_domain, $computing_elements = [], - $condor_admin_email = 'root@mysite.org', + $admin_email = $::htcondor::params::admin_email, $condor_priority = '99', $condor_version = 'present', $custom_attribute = 'NORDUGRID_QUEUE', @@ -159,7 +164,8 @@ # template selection. Allow for user to override $template_config_local = $::htcondor::params::template_config_local, $template_security = $::htcondor::params::template_security, - $template_resourcelimits = $::htcondor::params::template_resourcelimits, + $template_resourcelimits = + $::htcondor::params::template_resourcelimits, $template_queues = $::htcondor::params::template_queues, $template_schedd = $::htcondor::params::template_schedd, $template_fairshares = $::htcondor::params::template_fairshares, @@ -167,7 +173,8 @@ $template_ganglia = $::htcondor::params::template_ganglia, $template_workernode = $::htcondor::params::template_workernode, $template_defrag = $::htcondor::params::template_defrag, - $template_highavailability = $::htcondor::params::template_highavailability, + $template_highavailability = + $::htcondor::params::template_highavailability, $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, @@ -180,8 +187,8 @@ $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', $max_walltime = '80 * 60 * 60', - $max_cputime = '80 * 60 * 60', - ) inherits ::htcondor::params { + $max_cputime = '80 * 60 * 60',) inherits ::htcondor::params +{ class { 'htcondor::repositories': install_repos => $install_repositories, dev_repos => $dev_repositories, @@ -197,9 +204,7 @@ accounting_groups => $accounting_groups, cluster_has_multiple_domains => $cluster_has_multiple_domains, collector_name => $collector_name, - email_domain => $email_domain, computing_elements => $computing_elements, - condor_admin_email => $condor_admin_email, custom_attribute => $custom_attribute, enable_cgroup => $enable_cgroup, enable_multicore => $enable_multicore, diff --git a/manifests/params.pp b/manifests/params.pp index 9c7dfcc..d59bda1 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -14,6 +14,10 @@ $accounting_groups = hiera_hash('accounting_groups', $default_accounting_groups) + # notification settings + $admin_email = hiera('admin_email', 'localhost') + $email_domain = hiera('email_domain', 'localhost') + # template paths $template_config_local = hiera('template_config_local', "${module_name}/condor_config.local.erb" ) $template_security = hiera('template_security', "${module_name}/10_security.config.erb" diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index b1d5615..94be17d 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -3,7 +3,7 @@ CONDOR_IDS = 0.0 <% elsif @condor_uid.to_i > 0 and @condor_gid.to_i > 0 -%> CONDOR_IDS = <%= @condor_uid %>.<%= @condor_gid %> <% end -%> -CONDOR_ADMIN = <%= @condor_admin_email %> +CONDOR_ADMIN = <%= @admin_email %> PeriodicRemove = false <% if @request_memory -%> request_memory = int(JobMemoryLimit/1024.0) From 083345c7afe8599f1b90a5b0ba487f48dd99a7f5 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 11:25:39 +0000 Subject: [PATCH 17/54] formatting and removal of duplicated comments in config.pp --- manifests/config.pp | 222 +++++++++++++++----------------------------- 1 file changed, 76 insertions(+), 146 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index 3d3b84d..037af42 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -1,83 +1,6 @@ # Class htcondor::config -# -# == Parameters: -# -# [*accounting_groups*] -# Accounting grous (and subgroups) for fair share configuration. -# Requires use_accounting_groups = true -# Default just provides an example for what needs to be specified -# -# [*cluster_has_multiple_domains*] -# Specifies if the cluster has more than one domain. If true it will set -# TRUST_UID_DOMAIN = True in 10_security.config -# Default: false -# -# [*collector_name*] -# Sets COLLECTOR_NAME in 22_manager.config -# Default: 'Personal Condor at $(FULL_HOSTNAME)' -# -# [*computing_elements*] -# List of CEs that have access to this HTCondor pool -# -# [*condor_admin_email*] -# Contact email for the pool admin. Sets CONDOR_ADMIN. -# -# [*custom_attribute*] -# Can be used to specify a ClassAd via custom_attribute = True. This is useful -# when creating queues with ARC CEs -# Default: NORDUGRID_QUEUE -# -# [*include_username_in_accounting*] -# Bool. If false the accounting groups used are of the form -# group_. -# and if true -# group_.. -# -# [*high_priority_groups*] -# A hash of groups with high priority. It is used for the group sorting -# expression for condor. Groups with lower value are considered first. -# example: -# $high_priority_groups = { -# 'cms.admin' => -30, -# 'ops' => -20, -# 'dteam' => -10, -# } -# This will consider the group cms.admin first, followed by ops and dteam. -# -# [*is_ce*] -# If machine is a computing element or a scheduler (condor term) -# -# [*is_manager*] -# If machine is a manager or a negotiator (condor term) -# -# [*is_worker*] -# If the machine is a worker node -# -# [*machine_owner*] -# The owner of the machine (e.g. an accounting group) -# -# [*managers*] -# List of condor managers -# -# [*number_of_cpus*] -# Number of CPUs available for condor scheduling. This is set for worker nodes -# only -# -# [*pool_password*] -# Path to pool password file. -# -# [*uid_domain*] -# Condor UID_DOMAIN -# Default: example.com -# -# [*use_accounting_groups*] -# If accounting groups should be used (fair shares) -# -# [*worker_nodes*] -# List of worker nodes -# class htcondor::config ( - $accounting_groups = { + $accounting_groups = { 'CMS' => { priority_factor => 10000.00, dynamic_quota => 0.80, @@ -90,67 +13,67 @@ } , $cluster_has_multiple_domains = false, - $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', - $computing_elements = [], - $custom_attribute = 'NORDUGRID_QUEUE', - $enable_cgroup = false, - $enable_multicore = false, - $enable_healthcheck = false, + $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', + $computing_elements = [], + $custom_attribute = 'NORDUGRID_QUEUE', + $enable_cgroup = false, + $enable_multicore = false, + $enable_healthcheck = false, $high_priority_groups = { 'cms.admin' => -30, 'ops' => -20, 'dteam' => -10, } , - $priority_halflife = 43200, - $default_prio_factor = 100000.00, + $priority_halflife = 43200, + $default_prio_factor = 100000.00, $group_accept_surplus = true, - $group_autoregroup = true, - $health_check_script = "puppet:///modules/${module_name}/healhcheck_wn_condor", + $group_autoregroup = true, + $health_check_script = "puppet:///modules/${module_name}/healhcheck_wn_condor", $include_username_in_accounting = false, $use_pkg_condor_config = false, - $is_ce = false, - $is_manager = false, - $is_worker = false, - $machine_owner = 'physics', - $managers = [], - $number_of_cpus = undef, - $partitionable_slots = true, - $memory_overcommit = 1.5, - $request_memory = true, - $certificate_mapfile = "puppet:///modules/${module_name}/certificate_mapfile", - $kerberos_mapfile = "puppet:///modules/${module_name}/kerberos_mapfile", + $is_ce = false, + $is_manager = false, + $is_worker = false, + $machine_owner = 'physics', + $managers = [], + $number_of_cpus = undef, + $partitionable_slots = true, + $memory_overcommit = 1.5, + $request_memory = true, + $certificate_mapfile = "puppet:///modules/${module_name}/certificate_mapfile", + $kerberos_mapfile = "puppet:///modules/${module_name}/kerberos_mapfile", # pool_password can also be served from central file location using hiera - $pool_password = "puppet:///modules/${module_name}/pool_password", - $pool_home = '/pool', - $queues = hiera('grid_queues', undef), + $pool_password = "puppet:///modules/${module_name}/pool_password", + $pool_home = '/pool', + $queues = hiera('grid_queues', undef), $periodic_expr_interval = 60, $max_periodic_expr_interval = 1200, $remove_held_jobs_after = 1200, - $leave_job_in_queue = undef, + $leave_job_in_queue = undef, $ganglia_cluster_name = false, - $uid_domain = 'example.com', - $pool_create = true, - $default_domain_name = $uid_domain, - $filesystem_domain = $::fqdn, + $uid_domain = 'example.com', + $pool_create = true, + $default_domain_name = $uid_domain, + $filesystem_domain = $::fqdn, $use_accounting_groups = false, # specify the networks with write access i.e. ["10.132.0.*"] - $worker_nodes = [], - $condor_user = root, - $condor_group = root, - $condor_uid = 0, - $condor_gid = 0, + $worker_nodes = [], + $condor_user = root, + $condor_group = root, + $condor_uid = 0, + $condor_gid = 0, # template selection. Allow for user to override $template_config_local = "${module_name}/condor_config.local.erb", - $template_security = "${module_name}/10_security.config.erb", + $template_security = "${module_name}/10_security.config.erb", $template_resourcelimits = "${module_name}/12_resourcelimits.config.erb", - $template_queues = "${module_name}/13_queues.config.erb", - $template_schedd = "${module_name}/21_schedd.config.erb", - $template_fairshares = "${module_name}/11_fairshares.config.erb", - $template_manager = "${module_name}/22_manager.config.erb", - $template_workernode = "${module_name}/20_workernode.config.erb", - $template_ganglia = "${module_name}/23_ganglia.config.erb", - $template_defrag = "${module_name}/33_defrag.config.erb", + $template_queues = "${module_name}/13_queues.config.erb", + $template_schedd = "${module_name}/21_schedd.config.erb", + $template_fairshares = "${module_name}/11_fairshares.config.erb", + $template_manager = "${module_name}/22_manager.config.erb", + $template_workernode = "${module_name}/20_workernode.config.erb", + $template_ganglia = "${module_name}/23_ganglia.config.erb", + $template_defrag = "${module_name}/33_defrag.config.erb", $template_highavailability = "${module_name}/30_highavailability.config.erb", $use_htcondor_account_mapping = true, $use_fs_auth = true, @@ -164,43 +87,49 @@ $krb_map_file = '/etc/condor/kerberos_mapfile', $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', $max_walltime = '80 * 60 * 60', - $max_cputime = '80 * 60 * 60', - ) { - # TODO: instead of all the parameters, do it like https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp - # parameters are read from init, e.g. - # $::htcondor::cert_map_file - $email_domain = $::htcondor::email_domain - $admin_email = $::htcondor::admin_email + $max_cputime = '80 * 60 * 60',) { + # TODO: instead of all the parameters, do it like + # https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp + # parameters are read from init, e.g. + # $::htcondor::cert_map_file + $email_domain = $::htcondor::email_domain + $admin_email = $::htcondor::admin_email + # purge all non-managed config files from /etc/condor/config.d - file {'/etc/condor/config.d': + file { '/etc/condor/config.d': ensure => directory, recurse => true, purge => true, } - $now = strftime('%d.%m.%Y_%H.%M') - $ce_daemon_list = ['SCHEDD'] - $worker_daemon_list = ['STARTD'] - $ganglia_daemon_list = ['GANGLIAD'] - $auth_string = construct_auth_string($use_fs_auth, $use_password_auth, - $use_kerberos_auth, $use_claim_to_be_auth) + $now = strftime('%d.%m.%Y_%H.%M') + $ce_daemon_list = ['SCHEDD'] + $worker_daemon_list = ['STARTD'] + $ganglia_daemon_list = ['GANGLIAD'] + $auth_string = construct_auth_string($use_fs_auth, + $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) # because HTCondor uses user 'condor_pool' for remote access # and user 'condor' for local the variables below need to include # both users in case a machine has more than one role (i.e. manager + CE) - $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" + $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" $manager_string_remote = join_machine_list($machine_list_prefix, $managers) - $manager_string_local = join_machine_list($machine_prefix_local, $managers) - $manager_string = join([$manager_string_remote, $manager_string_local], ', ') + $manager_string_local = join_machine_list($machine_prefix_local, $managers) + $manager_string = join([$manager_string_remote, $manager_string_local], ', ' + ) - $ce_string_remote = join_machine_list($machine_list_prefix, $computing_elements) - $ce_string_local = join_machine_list($machine_prefix_local, $computing_elements) - $ce_string = join([$ce_string_remote, $ce_string_local], ', ') + $ce_string_remote = join_machine_list($machine_list_prefix, + $computing_elements) + $ce_string_local = join_machine_list($machine_prefix_local, + $computing_elements) + $ce_string = join([$ce_string_remote, $ce_string_local], ', ') - $wn_string_remote = join_machine_list($machine_list_prefix, $worker_nodes) - $wn_string_local = join_machine_list($machine_prefix_local, $worker_nodes) - $wn_string = join([$wn_string_remote, $wn_string_local], ', ') + $wn_string_remote = join_machine_list($machine_list_prefix, $worker_nodes + ) + $wn_string_local = join_machine_list($machine_prefix_local, + $worker_nodes) + $wn_string = join([$wn_string_remote, $wn_string_local], ', ') if $enable_multicore { $manage_daemon_list = ['COLLECTOR', 'NEGOTIATOR', 'DEFRAG'] @@ -440,9 +369,10 @@ if size($managers) > 1 { $replication_machines = suffix($managers, ':$(REPLICATION_PORT)') - $had_machines = suffix($managers, ':$(HAD_PORT)') - $replication_list = join($replication_machines, ', ') - $had_list= join($had_machines, ', ') + $had_machines = suffix($managers, ':$(HAD_PORT)') + $replication_list = join($replication_machines, ', ') + $had_list = join($had_machines, ', ') + file { '/etc/condor/config.d/30_highavailability.config': content => template($template_highavailability), require => Package['condor'], From d4284ece7b3e23b2330ddfcd2b8b2281ef6272a4 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 11:48:28 +0000 Subject: [PATCH 18/54] renaming & params: computing element -> scheduler --- manifests/config.pp | 61 ++++++++++++++++--------------- manifests/init.pp | 55 ++++++++++------------------ manifests/params.pp | 5 ++- templates/10_security.config.erb | 2 +- templates/condor_config.local.erb | 2 +- 5 files changed, 58 insertions(+), 67 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index 037af42..7848a86 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -14,7 +14,6 @@ , $cluster_has_multiple_domains = false, $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', - $computing_elements = [], $custom_attribute = 'NORDUGRID_QUEUE', $enable_cgroup = false, $enable_multicore = false, @@ -32,7 +31,7 @@ $health_check_script = "puppet:///modules/${module_name}/healhcheck_wn_condor", $include_username_in_accounting = false, $use_pkg_condor_config = false, - $is_ce = false, + $is_scheduler = false, $is_manager = false, $is_worker = false, $machine_owner = 'physics', @@ -63,18 +62,6 @@ $condor_group = root, $condor_uid = 0, $condor_gid = 0, - # template selection. Allow for user to override - $template_config_local = "${module_name}/condor_config.local.erb", - $template_security = "${module_name}/10_security.config.erb", - $template_resourcelimits = "${module_name}/12_resourcelimits.config.erb", - $template_queues = "${module_name}/13_queues.config.erb", - $template_schedd = "${module_name}/21_schedd.config.erb", - $template_fairshares = "${module_name}/11_fairshares.config.erb", - $template_manager = "${module_name}/22_manager.config.erb", - $template_workernode = "${module_name}/20_workernode.config.erb", - $template_ganglia = "${module_name}/23_ganglia.config.erb", - $template_defrag = "${module_name}/33_defrag.config.erb", - $template_highavailability = "${module_name}/30_highavailability.config.erb", $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, @@ -92,8 +79,24 @@ # https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp # parameters are read from init, e.g. # $::htcondor::cert_map_file - $email_domain = $::htcondor::email_domain - $admin_email = $::htcondor::admin_email + $email_domain = $htcondor::email_domain + $admin_email = $htcondor::admin_email + $schedulers = $htcondor::schedulers + + $is_scheduler = $htcondor::is_scheduler + + # templates + $template_config_local = $htcondor::template_config_local + $template_security = $htcondor::template_security + $template_resourcelimits = $htcondor::template_resourcelimits + $template_queues = $htcondor::template_queues + $template_schedd = $htcondor::template_schedd + $template_fairshares = $htcondor::template_fairshares + $template_manager = $htcondor::template_manager + $template_workernode = $htcondor::template_workernode + $template_ganglia = $htcondor::template_ganglia + $template_defrag = $htcondor::template_defrag + $template_highavailability = $htcondor::template_highavailability # purge all non-managed config files from /etc/condor/config.d file { '/etc/condor/config.d': @@ -103,7 +106,7 @@ } $now = strftime('%d.%m.%Y_%H.%M') - $ce_daemon_list = ['SCHEDD'] + $sched_daemon_list = ['SCHEDD'] $worker_daemon_list = ['STARTD'] $ganglia_daemon_list = ['GANGLIAD'] $auth_string = construct_auth_string($use_fs_auth, @@ -119,11 +122,11 @@ $manager_string = join([$manager_string_remote, $manager_string_local], ', ' ) - $ce_string_remote = join_machine_list($machine_list_prefix, - $computing_elements) - $ce_string_local = join_machine_list($machine_prefix_local, - $computing_elements) - $ce_string = join([$ce_string_remote, $ce_string_local], ', ') + $sched_string_remote = join_machine_list($machine_list_prefix, + $schedulers) + $sched_string_local = join_machine_list($machine_prefix_local, + $schedulers) + $sched_string = join([$sched_string_remote, $sched_string_local], ', ') $wn_string_remote = join_machine_list($machine_list_prefix, $worker_nodes ) @@ -153,10 +156,10 @@ ] } - if $is_ce and $is_manager { + if $is_scheduler and $is_manager { # machine is both CE and manager (for small sites) if $ganglia_cluster_name { - $temp_list = concat($default_daemon_list, $ce_daemon_list) + $temp_list = concat($default_daemon_list, $sched_daemon_list) $temp2_list = concat($temp_list, $ganglia_daemon_list) $daemon_list = concat($temp2_list, $manage_daemon_list) $additional_config_files = [ @@ -168,7 +171,7 @@ $config_files = concat($common_config_files, $additional_config_files) } else { - $temp_list = concat($default_daemon_list, $ce_daemon_list) + $temp_list = concat($default_daemon_list, $sched_daemon_list) $daemon_list = concat($temp_list, $manage_daemon_list) $additional_config_files = [ File['/etc/condor/config.d/12_resourcelimits.config'], @@ -179,8 +182,8 @@ $config_files = concat($common_config_files, $additional_config_files) } - } elsif $is_ce { - $daemon_list = concat($default_daemon_list, $ce_daemon_list) + } elsif $is_scheduler { + $daemon_list = concat($default_daemon_list, $sched_daemon_list) $additional_config_files = [ File['/etc/condor/config.d/12_resourcelimits.config'], File['/etc/condor/config.d/21_schedd.config'], @@ -251,7 +254,7 @@ if $pool_create { $condor_directories = [ - "${pool_home}", + $pool_home, "${pool_home}/condor", '/etc/condor/persistent'] } else { @@ -302,7 +305,7 @@ } # files for certain roles - if $is_ce { + if $is_scheduler { file { '/etc/condor/config.d/12_resourcelimits.config': content => template($template_resourcelimits), require => Package['condor'], diff --git a/manifests/init.pp b/manifests/init.pp index 2d9d525..f79b5de 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -22,8 +22,8 @@ # Sets COLLECTOR_NAME in 22_manager.config # Default: 'Personal Condor at $(FULL_HOSTNAME)' # -# [*computing_elements*] -# List of CEs that have access to this HTCondor pool +# [*schedulers*] +# List of schedulers that are allowed to submit jobs to the HTCondor pool # # [*admin_email*] # Sets CONDOR_ADMIN @@ -54,8 +54,8 @@ # [*install_repositories*] # Bool to install repositories or not # -# [*is_ce*] -# If machine is a computing element or a scheduler (condor term) +# [*$is_scheduler*] +# If current machine is a condor scheduler # # [*is_manager*] # If machine is a manager or a negotiator (condor term) @@ -104,12 +104,12 @@ # # Sample Usage: class htcondor ( - $accounting_groups = $::htcondor::params::accounting_groups, + $accounting_groups = $htcondor::params::accounting_groups, $cluster_has_multiple_domains = false, $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', - $email_domain = $::htcondor::params::email_domain, - $computing_elements = [], - $admin_email = $::htcondor::params::admin_email, + $email_domain = $htcondor::params::email_domain, + $schedulers = $htcondor::params::schedulers, + $admin_email = $htcondor::params::admin_email, $condor_priority = '99', $condor_version = 'present', $custom_attribute = 'NORDUGRID_QUEUE', @@ -131,7 +131,7 @@ $use_pkg_condor_config = false, $install_repositories = true, $dev_repositories = false, - $is_ce = false, + $is_scheduler = $htcondor::params::is_scheduler, $is_manager = false, $is_worker = false, $machine_owner = 'physics', @@ -162,19 +162,18 @@ $condor_uid = 0, $condor_gid = 0, # template selection. Allow for user to override - $template_config_local = $::htcondor::params::template_config_local, - $template_security = $::htcondor::params::template_security, - $template_resourcelimits = - $::htcondor::params::template_resourcelimits, - $template_queues = $::htcondor::params::template_queues, - $template_schedd = $::htcondor::params::template_schedd, - $template_fairshares = $::htcondor::params::template_fairshares, - $template_manager = $::htcondor::params::template_manager, - $template_ganglia = $::htcondor::params::template_ganglia, - $template_workernode = $::htcondor::params::template_workernode, - $template_defrag = $::htcondor::params::template_defrag, + $template_config_local = $htcondor::params::template_config_local, + $template_security = $htcondor::params::template_security, + $template_resourcelimits = $htcondor::params::template_resourcelimits, + $template_queues = $htcondor::params::template_queues, + $template_schedd = $htcondor::params::template_schedd, + $template_fairshares = $htcondor::params::template_fairshares, + $template_manager = $htcondor::params::template_manager, + $template_ganglia = $htcondor::params::template_ganglia, + $template_workernode = $htcondor::params::template_workernode, + $template_defrag = $htcondor::params::template_defrag, $template_highavailability = - $::htcondor::params::template_highavailability, + $htcondor::params::template_highavailability, $use_htcondor_account_mapping = true, $use_fs_auth = true, $use_password_auth = true, @@ -204,7 +203,6 @@ accounting_groups => $accounting_groups, cluster_has_multiple_domains => $cluster_has_multiple_domains, collector_name => $collector_name, - computing_elements => $computing_elements, custom_attribute => $custom_attribute, enable_cgroup => $enable_cgroup, enable_multicore => $enable_multicore, @@ -217,7 +215,6 @@ health_check_script => $health_check_script, include_username_in_accounting => $include_username_in_accounting, use_pkg_condor_config => $use_pkg_condor_config, - is_ce => $is_ce, is_manager => $is_manager, is_worker => $is_worker, machine_owner => $machine_owner, @@ -246,18 +243,6 @@ condor_group => $condor_group, condor_uid => $condor_uid, condor_gid => $condor_gid, - # template selection. Allow for user to override - template_config_local => $template_config_local, - template_security => $template_security, - template_resourcelimits => $template_resourcelimits, - template_queues => $template_queues, - template_schedd => $template_schedd, - template_fairshares => $template_fairshares, - template_manager => $template_manager, - template_workernode => $template_workernode, - template_ganglia => $template_ganglia, - template_defrag => $template_defrag, - template_highavailability => $template_highavailability, use_htcondor_account_mapping => $use_htcondor_account_mapping, use_fs_auth => $use_fs_auth, use_password_auth => $use_password_auth, diff --git a/manifests/params.pp b/manifests/params.pp index d59bda1..5626306 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -1,5 +1,8 @@ # htcondor::params class htcondor::params { + $schedulers = hiera_array('schedulers', []) + + $is_scheduler = hiera('is_scheduler', false) $default_accounting_groups = { 'CMS' => { priority_factor => 10000.00, @@ -30,7 +33,7 @@ ) $template_fairshares = hiera('template_fairshares', "${module_name}/11_fairshares.config.erb" ) - $template_collector = hiera('template_collector', "${module_name}/22_collector.config.erb" + $template_manager = hiera('template_collector', "${module_name}/22_manager.config.erb" ) $template_ganglia = hiera('template_ganglia', "${module_name}/23_ganglia.config.erb" ) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index d28c012..3bcb03f 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -16,7 +16,7 @@ TRUST_UID_DOMAIN = True # Machines & users CMS = <%= @manager_string %> -CES = <%= @ce_string %> +CES = <%= @sched_string %> WNS = <%= @wn_string %> USERS = *@$(UID_DOMAIN) diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index 94be17d..1c047bb 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -19,7 +19,7 @@ SUBMIT_EXPRS = $(SUBMIT_EXPRS) request_memory,LeaveJobInQueue DELEGATE_JOB_GSI_CREDENTIALS = False EMAIL_DOMAIN = <%= @email_domain %> -<% if @is_ce == true and @use_htcondor_account_mapping == true -%> +<% if @is_scheduler == true and @use_htcondor_account_mapping == true -%> AcctSubGroup = \ ifThenElse(RequestCpus > 1, "multicore",\ ifThenElse(regexp("prd",Owner), "production",\ From f0f32025008078757db84f5898c9d18113303cdc Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 12:26:31 +0000 Subject: [PATCH 19/54] moved remainder of parameters to params.pp --- manifests/config.pp | 6 +-- manifests/init.pp | 123 +++++++++++++++++++++---------------------- manifests/params.pp | 125 +++++++++++++++++++++++++++++++++++++------- 3 files changed, 170 insertions(+), 84 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index 7848a86..753251d 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -57,7 +57,7 @@ $filesystem_domain = $::fqdn, $use_accounting_groups = false, # specify the networks with write access i.e. ["10.132.0.*"] - $worker_nodes = [], + $workers = [], $condor_user = root, $condor_group = root, $condor_uid = 0, @@ -128,10 +128,10 @@ $schedulers) $sched_string = join([$sched_string_remote, $sched_string_local], ', ') - $wn_string_remote = join_machine_list($machine_list_prefix, $worker_nodes + $wn_string_remote = join_machine_list($machine_list_prefix, $workers ) $wn_string_local = join_machine_list($machine_prefix_local, - $worker_nodes) + $workers) $wn_string = join([$wn_string_remote, $wn_string_local], ', ') if $enable_multicore { diff --git a/manifests/init.pp b/manifests/init.pp index f79b5de..708d71e 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -105,57 +105,55 @@ # Sample Usage: class htcondor ( $accounting_groups = $htcondor::params::accounting_groups, - $cluster_has_multiple_domains = false, - $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', + $cluster_has_multiple_domains = + $htcondor::params::cluster_has_multiple_domains, + $collector_name = $htcondor::params::collector_name, $email_domain = $htcondor::params::email_domain, $schedulers = $htcondor::params::schedulers, $admin_email = $htcondor::params::admin_email, - $condor_priority = '99', - $condor_version = 'present', - $custom_attribute = 'NORDUGRID_QUEUE', - $enable_cgroup = false, - $enable_multicore = false, - $enable_healthcheck = false, - $high_priority_groups = { - 'cms.admin' => -30, - 'ops' => -20, - 'dteam' => -10, - } - , - $priority_halflife = 43200, - $default_prio_factor = 100000.00, - $group_accept_surplus = true, - $group_autoregroup = true, - $health_check_script = "puppet:///modules/${module_name}/healhcheck_wn_condor", - $include_username_in_accounting = false, - $use_pkg_condor_config = false, - $install_repositories = true, - $dev_repositories = false, + $condor_priority = $htcondor::params::repo_priority, + $condor_version = $htcondor::params::condor_version, + $custom_attribute = $htcondor::params::custom_attribute, + $enable_cgroup = $htcondor::params::enable_cgroup, + $enable_multicore = $htcondor::params::enable_multicore, + $enable_healthcheck = $htcondor::params::enable_healthcheck, + $high_priority_groups = $htcondor::params::high_priority_groups, + $priority_halflife = $htcondor::params::priority_halflife, + $default_prio_factor = $htcondor::params::default_prio_factor, + $group_accept_surplus = $htcondor::params::group_accept_surplus, + $group_autoregroup = $htcondor::params::group_autoregroup, + $health_check_script = $htcondor::params::health_check_script, + $include_username_in_accounting = + $htcondor::params::include_username_in_accounting, + $use_pkg_condor_config = $htcondor::params::use_pkg_condor_config, + $install_repositories = $htcondor::params::install_repositories, + $dev_repositories = $htcondor::params::dev_repositories, $is_scheduler = $htcondor::params::is_scheduler, - $is_manager = false, - $is_worker = false, - $machine_owner = 'physics', - $managers = [], - $number_of_cpus = undef, - $partitionable_slots = true, - $memory_overcommit = 1.5, - $request_memory = true, - $certificate_mapfile = "puppet:///modules/${module_name}/certificate_mapfile", - $kerberos_mapfile = "puppet:///modules/${module_name}/kerberos_mapfile", - $pool_home = '/pool', - $pool_create = true, - $queues = hiera('grid_queues', undef), - $periodic_expr_interval = 60, - $max_periodic_expr_interval = 1200, - $remove_held_jobs_after = 1200, - $leave_job_in_queue = undef, - $ganglia_cluster_name = false, - $pool_password = "puppet:///modules/${module_name}/pool_password", - $uid_domain = 'example.com', - $default_domain_name = $uid_domain, - $filesystem_domain = $::fqdn, - $use_accounting_groups = false, - $worker_nodes = [], + $is_manager = $htcondor::params::is_manager, + $is_worker = $htcondor::params::is_worker, + $machine_owner = $htcondor::params::machine_owner, + $managers = $htcondor::params::managers, + $number_of_cpus = $htcondor::params::number_of_cpus, + $partitionable_slots = $htcondor::params::partitionable_slots, + $memory_overcommit = $htcondor::params::memory_overcommit, + $request_memory = $htcondor::params::request_memory, + $certificate_mapfile = $htcondor::params::certificate_mapfile, + $kerberos_mapfile = $htcondor::params::kerberos_mapfile, + $pool_home = $htcondor::params::pool_home, + $pool_create = $htcondor::params::pool_create, + $queues = $htcondor::params::queues, + $periodic_expr_interval = $htcondor::params::periodic_expr_interval, + $max_periodic_expr_interval = + $htcondor::params::max_periodic_expr_interval, + $remove_held_jobs_after = $htcondor::params::remove_held_jobs_after, + $leave_job_in_queue = $htcondor::params::leave_job_in_queue, + $ganglia_cluster_name = $htcondor::params::ganglia_cluster_name, + $pool_password = $htcondor::params::pool_password_file, + $uid_domain = $htcondor::params::uid_domain, + $default_domain_name = $htcondor::params::default_domain_name, + $filesystem_domain = $htcondor::params::filesystem_domain, + $use_accounting_groups = $htcondor::params::use_accounting_groups, + $workers = $htcondor::params::workers, # default params $condor_user = root, $condor_group = root, @@ -174,20 +172,21 @@ $template_defrag = $htcondor::params::template_defrag, $template_highavailability = $htcondor::params::template_highavailability, - $use_htcondor_account_mapping = true, - $use_fs_auth = true, - $use_password_auth = true, - $use_kerberos_auth = false, - $use_claim_to_be_auth = false, - $use_cert_map_file = false, - $use_krb_map_file = false, - $use_pid_namespaces = false, - $cert_map_file = '/etc/condor/certificate_mapfile', - $krb_map_file = '/etc/condor/kerberos_mapfile', - $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', - $max_walltime = '80 * 60 * 60', - $max_cputime = '80 * 60 * 60',) inherits ::htcondor::params -{ + $use_htcondor_account_mapping = + $htcondor::params::use_htcondor_account_mapping, + $use_fs_auth = $htcondor::params::use_fs_auth, + $use_password_auth = $htcondor::params::use_password_auth, + $use_kerberos_auth = $htcondor::params::use_kerberos_auth, + $use_claim_to_be_auth = $htcondor::params::use_claim_to_be_auth, + $use_cert_map_file = $htcondor::params::use_cert_map_file, + $use_krb_map_file = $htcondor::params::use_krb_map_file, + $use_pid_namespaces = $htcondor::params::use_pid_namespaces, + $cert_map_file = $htcondor::params::cert_map_file, + $krb_map_file = $htcondor::params::krb_map_file, + $machine_list_prefix = $htcondor::params::machine_list_prefix, + $max_walltime = $htcondor::params::max_walltime, + $max_cputime = $htcondor::params::max_cputime,) inherits +::htcondor::params { class { 'htcondor::repositories': install_repos => $install_repositories, dev_repos => $dev_repositories, @@ -238,7 +237,7 @@ default_domain_name => $default_domain_name, filesystem_domain => $filesystem_domain, use_accounting_groups => $use_accounting_groups, - worker_nodes => $worker_nodes, + workers => $workers, condor_user => $condor_user, condor_group => $condor_group, condor_uid => $condor_uid, diff --git a/manifests/params.pp b/manifests/params.pp index 5626306..c55050c 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -1,9 +1,27 @@ # htcondor::params class htcondor::params { - $schedulers = hiera_array('schedulers', []) + $schedulers = hiera_array('schedulers', []) + $managers = hiera_array('managers', []) + $workers = hiera_array('workers', []) - $is_scheduler = hiera('is_scheduler', false) - $default_accounting_groups = { + $is_manager = hiera('is_manager', false) + $is_scheduler = hiera('is_scheduler', false) + $is_worker = hiera('is_worker', false) + + $cluster_has_multiple_domains = hiera('cluster_has_multiple_domains', false) + $collector_name = hiera('collector_name', 'Personal Condor at $(FULL_HOSTNAME)' + ) + $repo_priority = hiera('repo_priority', '99') + $condor_version = hiera('condor_version', 'present') + $custom_attribute = hiera('custom_attribute', 'NORDUGRID_QUEUE') + + $enable_cgroup = hiera('enable_cgroup', false) + $enable_multicore = hiera('enable_multicore', false) + $enable_healthcheck = hiera('enable_healthcheck', false) + + $high_priority_groups = hiera_hash('high_priority_groups', undef) + + $default_accounting_groups = { 'CMS' => { priority_factor => 10000.00, dynamic_quota => 0.80, @@ -14,33 +32,102 @@ dynamic_quota => 0.95, } } - $accounting_groups = hiera_hash('accounting_groups', + $accounting_groups = hiera_hash('accounting_groups', $default_accounting_groups) - # notification settings - $admin_email = hiera('admin_email', 'localhost') - $email_domain = hiera('email_domain', 'localhost') - # template paths - $template_config_local = hiera('template_config_local', "${module_name}/condor_config.local.erb" + $priority_halflife = hiera('priority_halflife', 43200) + $default_prio_factor = hiera('default_prio_factor', 100000.00) + $group_accept_surplus = hiera('group_accept_surplus', true) + $group_autoregroup = hiera('group_autoregroup', true) + + $health_check_script = hiera('health_check_script', "puppet:///modules/${module_name}/healhcheck_wn_condor" + ) + $include_username_in_accounting = hiera('include_username_in_accounting', + false) + $use_pkg_condor_config = hiera('use_pkg_condor_config', false) + $install_repositories = hiera('install_repositories', true) + $dev_repositories = hiera('dev_repositories', false) + + $machine_owner = hiera('machine_owner', 'physics') + + $number_of_cpus = hiera('number_of_cpus', $::processors['count' + ]) + + $partitionable_slots = hiera('partitionable_slots', true) + $memory_overcommit = hiera('memory_overcommit', 1.5) + $request_memory = hiera('request_memory', true) + $certificate_mapfile = hiera('certificate_mapfile', "puppet:///modules/${module_name}/certificate_mapfile" + ) + $kerberos_mapfile = hiera('kerberos_mapfile', "puppet:///modules/${module_name}/kerberos_mapfile" ) - $template_security = hiera('template_security', "${module_name}/10_security.config.erb" + + $pool_home = hiera('pool_home', '/pool') + $pool_create = hiera('pool_create', true) + $queues = hiera('grid_queues', undef) + $periodic_expr_interval = hiera('periodic_expr_interval', 60) + $max_periodic_expr_interval = hiera('max_periodic_expr_interval', 1200) + $remove_held_jobs_after = hiera('remove_held_jobs_after', 1200) + $leave_job_in_queue = hiera('leave_job_in_queue', undef) + $max_walltime = hiera('max_walltime', '80 * 60 * 60') + $max_cputime = hiera('max_cputime', '80 * 60 * 60') + + $ganglia_cluster_name = hiera('ganglia_cluster_name', 'Example HTCondor' + ) + + $uid_domain = hiera('uid_domain', 'example.org') + $default_domain_name = hiera('default_domain_name', $uid_domain) + $filesystem_domain = hiera('filesystem_domain', $::fqdn) + + $use_accounting_groups = hiera('use_accounting_groups', false) + $use_htcondor_account_mapping = hiera('use_htcondor_account_mapping', true) + + # service security + $condor_user = hiera('condor_user', root) + $condor_group = hiera('condor_group', root) + $condor_uid = hiera('condor_uid', 0) + $condor_gid = hiera('condor_gid', 0) + + # authentication + $use_fs_auth = hiera('use_fs_auth', true) + $use_password_auth = hiera('use_password_auth', true) + $use_kerberos_auth = hiera('use_kerberos_auth', false) + $use_claim_to_be_auth = hiera('use_claim_to_be_auth', false) + $use_cert_map_file = hiera('use_cert_map_file', false) + $use_krb_map_file = hiera('use_krb_map_file', false) + $use_pid_namespaces = hiera('use_pid_namespaces', false) + $cert_map_file = hiera('cert_map_file', '/etc/condor/certificate_mapfile' ) - $template_resourcelimits = hiera('template_resourcelimits', "${module_name}/12_resourcelimits.config.erb" + $krb_map_file = hiera('krb_map_file', '/etc/condor/kerberos_mapfile' + ) + $machine_list_prefix = hiera('machine_list_prefix', 'condor_pool@$(UID_DOMAIN)/' + ) + $pool_password_file = hiera('pool_password_file', "puppet:///modules/${module_name}/pool_password" + ) + + # notification settings + $admin_email = hiera('admin_email', 'localhost') + $email_domain = hiera('email_domain', 'localhost') + # template paths + $template_config_local = hiera('template_config_local', + "${module_name}/condor_config.local.erb") + $template_security = hiera('template_security', "${module_name}/10_security.config.erb" ) - $template_queues = hiera('template_queues', "${module_name}/13_queues.config.erb" + $template_resourcelimits = hiera('template_resourcelimits', + "${module_name}/12_resourcelimits.config.erb") + $template_queues = hiera('template_queues', "${module_name}/13_queues.config.erb" ) - $template_schedd = hiera('template_schedd', "${module_name}/21_schedd.config.erb" + $template_schedd = hiera('template_schedd', "${module_name}/21_schedd.config.erb" ) - $template_fairshares = hiera('template_fairshares', "${module_name}/11_fairshares.config.erb" + $template_fairshares = hiera('template_fairshares', "${module_name}/11_fairshares.config.erb" ) - $template_manager = hiera('template_collector', "${module_name}/22_manager.config.erb" + $template_manager = hiera('template_collector', "${module_name}/22_manager.config.erb" ) - $template_ganglia = hiera('template_ganglia', "${module_name}/23_ganglia.config.erb" + $template_ganglia = hiera('template_ganglia', "${module_name}/23_ganglia.config.erb" ) - $template_workernode = hiera('template_workernode', "${module_name}/20_workernode.config.erb" + $template_workernode = hiera('template_workernode', "${module_name}/20_workernode.config.erb" ) - $template_defrag = hiera('template_defrag', "${module_name}/33_defrag.config.erb" + $template_defrag = hiera('template_defrag', "${module_name}/33_defrag.config.erb" ) - $template_highavailability = hiera('template_defrag', "${module_name}/30_highavailability.config.erb" + $template_highavailability = hiera('template_defrag', "${module_name}/30_highavailability.config.erb" ) } From d54d95f01bc4ef6290dcbb6b79d84939861cca8b Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 13:28:36 +0000 Subject: [PATCH 20/54] removed config paramters, now using the ones from init.pp --- manifests/config.pp | 142 ++++++++++++++++++++------------------------ manifests/init.pp | 56 ----------------- manifests/params.pp | 8 +-- 3 files changed, 70 insertions(+), 136 deletions(-) diff --git a/manifests/config.pp b/manifests/config.pp index 753251d..551de9d 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -1,80 +1,5 @@ # Class htcondor::config -class htcondor::config ( - $accounting_groups = { - 'CMS' => { - priority_factor => 10000.00, - dynamic_quota => 0.80, - } - , - 'CMS.production' => { - priority_factor => 10000.00, - dynamic_quota => 0.95, - } - } - , - $cluster_has_multiple_domains = false, - $collector_name = 'Personal Condor at $(FULL_HOSTNAME)', - $custom_attribute = 'NORDUGRID_QUEUE', - $enable_cgroup = false, - $enable_multicore = false, - $enable_healthcheck = false, - $high_priority_groups = { - 'cms.admin' => -30, - 'ops' => -20, - 'dteam' => -10, - } - , - $priority_halflife = 43200, - $default_prio_factor = 100000.00, - $group_accept_surplus = true, - $group_autoregroup = true, - $health_check_script = "puppet:///modules/${module_name}/healhcheck_wn_condor", - $include_username_in_accounting = false, - $use_pkg_condor_config = false, - $is_scheduler = false, - $is_manager = false, - $is_worker = false, - $machine_owner = 'physics', - $managers = [], - $number_of_cpus = undef, - $partitionable_slots = true, - $memory_overcommit = 1.5, - $request_memory = true, - $certificate_mapfile = "puppet:///modules/${module_name}/certificate_mapfile", - $kerberos_mapfile = "puppet:///modules/${module_name}/kerberos_mapfile", - # pool_password can also be served from central file location using hiera - $pool_password = "puppet:///modules/${module_name}/pool_password", - $pool_home = '/pool', - $queues = hiera('grid_queues', undef), - $periodic_expr_interval = 60, - $max_periodic_expr_interval = 1200, - $remove_held_jobs_after = 1200, - $leave_job_in_queue = undef, - $ganglia_cluster_name = false, - $uid_domain = 'example.com', - $pool_create = true, - $default_domain_name = $uid_domain, - $filesystem_domain = $::fqdn, - $use_accounting_groups = false, - # specify the networks with write access i.e. ["10.132.0.*"] - $workers = [], - $condor_user = root, - $condor_group = root, - $condor_uid = 0, - $condor_gid = 0, - $use_htcondor_account_mapping = true, - $use_fs_auth = true, - $use_password_auth = true, - $use_kerberos_auth = false, - $use_claim_to_be_auth = false, - $use_cert_map_file = false, - $use_krb_map_file = false, - $use_pid_namespaces = false, - $cert_map_file = '/etc/condor/certificate_mapfile', - $krb_map_file = '/etc/condor/kerberos_mapfile', - $machine_list_prefix = 'condor_pool@$(UID_DOMAIN)/', - $max_walltime = '80 * 60 * 60', - $max_cputime = '80 * 60 * 60',) { +class htcondor::config { # TODO: instead of all the parameters, do it like # https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp # parameters are read from init, e.g. @@ -82,8 +7,14 @@ $email_domain = $htcondor::email_domain $admin_email = $htcondor::admin_email $schedulers = $htcondor::schedulers + $managers = $htcondor::managers + $workers = $htcondor::workers $is_scheduler = $htcondor::is_scheduler + $is_manager = $htcondor::is_manager + $is_worker = $htcondor::is_worker + + $machine_ownder = $htcondor::machine_owner # templates $template_config_local = $htcondor::template_config_local @@ -98,6 +29,65 @@ $template_defrag = $htcondor::template_defrag $template_highavailability = $htcondor::template_highavailability + $accounting_groups = $htcondor::accounting_groups + $cluster_has_multiple_domains = $htcondor::cluster_has_multiple_domains + $collector_name = $htcondor::collector_name + $custom_attribute = $htcondor::custom_attribute + $enable_cgroup = $htcondor::enable_cgroup + $enable_multicore = $htcondor::enable_multicore + $enable_healthcheck = $htcondor::enable_healthcheck + $high_priority_groups = $htcondor::high_priority_groups + $priority_halflife = $htcondor::priority_halflife + $default_prio_factor = $htcondor::default_prio_factor + $group_accept_surplus = $htcondor::group_accept_surplus + $group_autoregroup = $htcondor::group_autoregroup + $health_check_script = $htcondor::health_check_script + $include_username_in_accounting = $htcondor::include_username_in_accounting + $use_pkg_condor_config = $htcondor::use_pkg_condor_config + + $number_of_cpus = $htcondor::number_of_cpus + $partitionable_slots = $htcondor::partitionable_slots + $memory_overcommit = $htcondor::memory_overcommit + $request_memory = $htcondor::request_memory + # TODO: duplicate? + $cert_map_file = $htcondor::cert_map_file + $certificate_mapfile = $htcondor::certificate_mapfile + $kerberos_mapfile = $htcondor::kerberos_mapfile + $krb_map_file = $htcondor::krb_map_file + + $pool_home = $htcondor::pool_home + $queues = $htcondor::queues + $periodic_expr_interval = $htcondor::periodic_expr_interval + $max_periodic_expr_interval = $htcondor::max_periodic_expr_interval + $remove_held_jobs_after = $htcondor::remove_held_jobs_after + $leave_job_in_queue = $htcondor::leave_job_in_queue + $ganglia_cluster_name = $htcondor::ganglia_cluster_name + $pool_password = $htcondor::pool_password + $pool_create = $htcondor::pool_create + $uid_domain = $htcondor::uid_domain + $default_domain_name = $htcondor::default_domain_name + $filesystem_domain = $htcondor::filesystem_domain + $use_accounting_groups = $htcondor::use_accounting_groups + + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $condor_uid = $htcondor::condor_uid + $condor_gid = $htcondor::condor_gid + + $use_htcondor_account_mapping = $htcondor::use_htcondor_account_mapping + $use_fs_auth = $htcondor::use_fs_auth + $use_password_auth = $htcondor::use_password_auth + $use_kerberos_auth = $htcondor::use_kerberos_auth + $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth + $use_cert_map_file = $htcondor::use_cert_map_file + $use_krb_map_file = $htcondor::use_krb_map_file + $use_pid_namespaces = $htcondor::use_pid_namespaces + + $machine_list_prefix = $htcondor::machine_list_prefix + $max_walltime = $htcondor::max_walltime + $max_cputime = $htcondor::max_cputime + + # purge all non-managed config files from /etc/condor/config.d file { '/etc/condor/config.d': ensure => directory, diff --git a/manifests/init.pp b/manifests/init.pp index 708d71e..16ae199 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -199,62 +199,6 @@ } class { 'htcondor::config': - accounting_groups => $accounting_groups, - cluster_has_multiple_domains => $cluster_has_multiple_domains, - collector_name => $collector_name, - custom_attribute => $custom_attribute, - enable_cgroup => $enable_cgroup, - enable_multicore => $enable_multicore, - enable_healthcheck => $enable_healthcheck, - high_priority_groups => $high_priority_groups, - priority_halflife => $priority_halflife, - default_prio_factor => $default_prio_factor, - group_accept_surplus => $group_accept_surplus, - group_autoregroup => $group_autoregroup, - health_check_script => $health_check_script, - include_username_in_accounting => $include_username_in_accounting, - use_pkg_condor_config => $use_pkg_condor_config, - is_manager => $is_manager, - is_worker => $is_worker, - machine_owner => $machine_owner, - managers => $managers, - number_of_cpus => $number_of_cpus, - partitionable_slots => $partitionable_slots, - memory_overcommit => $memory_overcommit, - request_memory => $request_memory, - certificate_mapfile => $certificate_mapfile, - kerberos_mapfile => $kerberos_mapfile, - pool_home => $pool_home, - queues => $queues, - periodic_expr_interval => $periodic_expr_interval, - max_periodic_expr_interval => $max_periodic_expr_interval, - remove_held_jobs_after => $remove_held_jobs_after, - leave_job_in_queue => $leave_job_in_queue, - ganglia_cluster_name => $ganglia_cluster_name, - pool_password => $pool_password, - pool_create => $pool_create, - uid_domain => $uid_domain, - default_domain_name => $default_domain_name, - filesystem_domain => $filesystem_domain, - use_accounting_groups => $use_accounting_groups, - workers => $workers, - condor_user => $condor_user, - condor_group => $condor_group, - condor_uid => $condor_uid, - condor_gid => $condor_gid, - use_htcondor_account_mapping => $use_htcondor_account_mapping, - use_fs_auth => $use_fs_auth, - use_password_auth => $use_password_auth, - use_kerberos_auth => $use_kerberos_auth, - use_claim_to_be_auth => $use_claim_to_be_auth, - use_cert_map_file => $use_cert_map_file, - use_krb_map_file => $use_krb_map_file, - use_pid_namespaces => $use_pid_namespaces, - cert_map_file => $cert_map_file, - krb_map_file => $krb_map_file, - machine_list_prefix => $machine_list_prefix, - max_walltime => $max_walltime, - max_cputime => $max_cputime, } class { 'htcondor::service': diff --git a/manifests/params.pp b/manifests/params.pp index c55050c..754d43e 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -56,10 +56,6 @@ $partitionable_slots = hiera('partitionable_slots', true) $memory_overcommit = hiera('memory_overcommit', 1.5) $request_memory = hiera('request_memory', true) - $certificate_mapfile = hiera('certificate_mapfile', "puppet:///modules/${module_name}/certificate_mapfile" - ) - $kerberos_mapfile = hiera('kerberos_mapfile', "puppet:///modules/${module_name}/kerberos_mapfile" - ) $pool_home = hiera('pool_home', '/pool') $pool_create = hiera('pool_create', true) @@ -97,8 +93,12 @@ $use_pid_namespaces = hiera('use_pid_namespaces', false) $cert_map_file = hiera('cert_map_file', '/etc/condor/certificate_mapfile' ) + $certificate_mapfile = hiera('certificate_mapfile', "puppet:///modules/${module_name}/certificate_mapfile" + ) $krb_map_file = hiera('krb_map_file', '/etc/condor/kerberos_mapfile' ) + $kerberos_mapfile = hiera('kerberos_mapfile', "puppet:///modules/${module_name}/kerberos_mapfile" + ) $machine_list_prefix = hiera('machine_list_prefix', 'condor_pool@$(UID_DOMAIN)/' ) $pool_password_file = hiera('pool_password_file', "puppet:///modules/${module_name}/pool_password" From 9a6dbd34493304fee44c0146961101e981087f7b Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 13:34:46 +0000 Subject: [PATCH 21/54] simplifying repo installation --- manifests/init.pp | 7 ++-- manifests/repositories.pp | 79 +++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 45 deletions(-) diff --git a/manifests/init.pp b/manifests/init.pp index 16ae199..a813ebc 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -187,10 +187,9 @@ $max_walltime = $htcondor::params::max_walltime, $max_cputime = $htcondor::params::max_cputime,) inherits ::htcondor::params { - class { 'htcondor::repositories': - install_repos => $install_repositories, - dev_repos => $dev_repositories, - condor_priority => $condor_priority, + + if $install_repositories { + include htcondor::repositories } class { 'htcondor::install': diff --git a/manifests/repositories.pp b/manifests/repositories.pp index e561b85..d32067a 100644 --- a/manifests/repositories.pp +++ b/manifests/repositories.pp @@ -1,51 +1,48 @@ # Class: htcondor::repositories # # Provides yum repositories for HTCondor installation -class htcondor::repositories ( - $install_repos = true, - $dev_repos = false, - $condor_priority = '99',) { - if $install_repos { - $major_release = regsubst($::operatingsystemrelease, '^(\d+)\.\d+$', '\1') +class htcondor::repositories { + $dev_repos = $htcondor::dev_repositories + $condor_priority = $htcondor::condor_priority + $major_release = regsubst($::operatingsystemrelease, '^(\d+)\.\d+$', '\1') - case $::osfamily { - 'RedHat' : { - if $dev_repos { - yumrepo { 'htcondor-development': - descr => "HTCondor Development RPM Repository for Redhat Enterprise Linux ${major_release}", - baseurl => "http://research.cs.wisc.edu/htcondor/yum/development/rhel${major_release}", - enabled => 1, - gpgcheck => 0, - priority => "${condor_priority}", - exclude => 'condor.i386, condor.i686', - before => [Package['condor']], - } - } else { - yumrepo { 'htcondor-stable': - descr => "HTCondor Stable RPM Repository for Redhat Enterprise Linux ${major_release}", - baseurl => "http://research.cs.wisc.edu/htcondor/yum/stable/rhel${major_release}", - enabled => 1, - gpgcheck => 0, - priority => "${condor_priority}", - exclude => 'condor.i386, condor.i686', - before => [Package['condor']], - } + case $::osfamily { + 'RedHat' : { + if $dev_repos { + yumrepo { 'htcondor-development': + descr => "HTCondor Development RPM Repository for Redhat Enterprise Linux ${major_release}", + baseurl => "http://research.cs.wisc.edu/htcondor/yum/development/rhel${major_release}", + enabled => 1, + gpgcheck => 0, + priority => $condor_priority, + exclude => 'condor.i386, condor.i686', + before => [Package['condor']], + } + } else { + yumrepo { 'htcondor-stable': + descr => "HTCondor Stable RPM Repository for Redhat Enterprise Linux ${major_release}", + baseurl => "http://research.cs.wisc.edu/htcondor/yum/stable/rhel${major_release}", + enabled => 1, + gpgcheck => 0, + priority => $condor_priority, + exclude => 'condor.i386, condor.i686', + before => [Package['condor']], } - } - 'Debian' : { - # http://research.cs.wisc.edu/htcondor/debian/ - notify { 'Debian based systems currently not supported': } - } - 'Windows' : { - # http://research.cs.wisc.edu/htcondor/manual/v8.0/3_2Installation.html#SECTION00425000000000000000 - notify { 'Windows based systems currently not supported': } - } - default : { - $osfamily = $::osfamily - - notify { "OS family '${osfamily}' not recognised": } } } + 'Debian' : { + # http://research.cs.wisc.edu/htcondor/debian/ + notify { 'Debian based systems currently not supported': } + } + 'Windows' : { + # http://research.cs.wisc.edu/htcondor/manual/latest/3_2Installation.html#SECTION00425000000000000000 + notify { 'Windows based systems currently not supported': } + } + default : { + $osfamily = $::osfamily + notify { "OS family '${osfamily}' not recognised": } + } } + } From 9fc8f6e095c9f7d7085481c76df47d6c658ce1f3 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 13:40:34 +0000 Subject: [PATCH 22/54] simplified install --- manifests/init.pp | 5 +---- manifests/install.pp | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/manifests/init.pp b/manifests/init.pp index a813ebc..82eba01 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -187,14 +187,11 @@ $max_walltime = $htcondor::params::max_walltime, $max_cputime = $htcondor::params::max_cputime,) inherits ::htcondor::params { - if $install_repositories { - include htcondor::repositories + class { 'htcondor::repositories': } } class { 'htcondor::install': - ensure => $condor_version, - dev_repos => $dev_repositories, } class { 'htcondor::config': diff --git a/manifests/install.pp b/manifests/install.pp index 2e5e3f7..f748758 100644 --- a/manifests/install.pp +++ b/manifests/install.pp @@ -1,15 +1,15 @@ # Class htcondor::install # # Install HTCondor packages -class htcondor::install ( - $ensure = present, - $dev_repos = false,) { - if $dev_repos { - $repo = 'htcondor-development' - } else { - $repo = 'htcondor-stable' - } - package { 'condor': - ensure => $ensure, +class htcondor::install { + $package_ensure = $htcondor::condor_version + + $_package_ensure = $package_ensure ? { + true => 'present', + false => 'purged', + 'absent' => 'purged', + default => $package_ensure, } + + package { 'condor': ensure => $_package_ensure, } } From ee747be3ce2f2904a18951f4154fecb2e0025a85 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 16:01:21 +0000 Subject: [PATCH 23/54] splitting config across multiple files --- manifests/config.pp | 392 +---------------------- manifests/config/common.pp | 46 +++ manifests/config/manager.pp | 85 +++++ manifests/config/scheduler.pp | 51 +++ manifests/config/security.pp | 101 ++++++ manifests/config/worker.pp | 55 ++++ manifests/params.pp | 3 +- templates/13_queues.config.erb | 2 +- templates/20_workernode.config.erb | 6 +- templates/21_schedd.config.erb | 2 +- templates/22_manager.config.erb | 2 +- templates/30_highavailability.config.erb | 4 +- 12 files changed, 356 insertions(+), 393 deletions(-) create mode 100644 manifests/config/common.pp create mode 100644 manifests/config/manager.pp create mode 100644 manifests/config/scheduler.pp create mode 100644 manifests/config/security.pp create mode 100644 manifests/config/worker.pp diff --git a/manifests/config.pp b/manifests/config.pp index 551de9d..eda1e51 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -1,92 +1,11 @@ # Class htcondor::config class htcondor::config { - # TODO: instead of all the parameters, do it like - # https://github.com/puppetlabs/puppetlabs-postgresql/blob/master/manifests/server/install.pp - # parameters are read from init, e.g. - # $::htcondor::cert_map_file - $email_domain = $htcondor::email_domain - $admin_email = $htcondor::admin_email - $schedulers = $htcondor::schedulers - $managers = $htcondor::managers - $workers = $htcondor::workers - - $is_scheduler = $htcondor::is_scheduler - $is_manager = $htcondor::is_manager - $is_worker = $htcondor::is_worker - - $machine_ownder = $htcondor::machine_owner - - # templates - $template_config_local = $htcondor::template_config_local - $template_security = $htcondor::template_security - $template_resourcelimits = $htcondor::template_resourcelimits - $template_queues = $htcondor::template_queues - $template_schedd = $htcondor::template_schedd - $template_fairshares = $htcondor::template_fairshares - $template_manager = $htcondor::template_manager - $template_workernode = $htcondor::template_workernode - $template_ganglia = $htcondor::template_ganglia - $template_defrag = $htcondor::template_defrag - $template_highavailability = $htcondor::template_highavailability - - $accounting_groups = $htcondor::accounting_groups - $cluster_has_multiple_domains = $htcondor::cluster_has_multiple_domains - $collector_name = $htcondor::collector_name - $custom_attribute = $htcondor::custom_attribute - $enable_cgroup = $htcondor::enable_cgroup - $enable_multicore = $htcondor::enable_multicore - $enable_healthcheck = $htcondor::enable_healthcheck - $high_priority_groups = $htcondor::high_priority_groups - $priority_halflife = $htcondor::priority_halflife - $default_prio_factor = $htcondor::default_prio_factor - $group_accept_surplus = $htcondor::group_accept_surplus - $group_autoregroup = $htcondor::group_autoregroup - $health_check_script = $htcondor::health_check_script - $include_username_in_accounting = $htcondor::include_username_in_accounting - $use_pkg_condor_config = $htcondor::use_pkg_condor_config - - $number_of_cpus = $htcondor::number_of_cpus - $partitionable_slots = $htcondor::partitionable_slots - $memory_overcommit = $htcondor::memory_overcommit - $request_memory = $htcondor::request_memory - # TODO: duplicate? - $cert_map_file = $htcondor::cert_map_file - $certificate_mapfile = $htcondor::certificate_mapfile - $kerberos_mapfile = $htcondor::kerberos_mapfile - $krb_map_file = $htcondor::krb_map_file - - $pool_home = $htcondor::pool_home - $queues = $htcondor::queues - $periodic_expr_interval = $htcondor::periodic_expr_interval - $max_periodic_expr_interval = $htcondor::max_periodic_expr_interval - $remove_held_jobs_after = $htcondor::remove_held_jobs_after - $leave_job_in_queue = $htcondor::leave_job_in_queue + $enable_multicore = $htcondor::enable_multicore $ganglia_cluster_name = $htcondor::ganglia_cluster_name - $pool_password = $htcondor::pool_password - $pool_create = $htcondor::pool_create - $uid_domain = $htcondor::uid_domain - $default_domain_name = $htcondor::default_domain_name - $filesystem_domain = $htcondor::filesystem_domain - $use_accounting_groups = $htcondor::use_accounting_groups - - $condor_user = $htcondor::condor_user - $condor_group = $htcondor::condor_group - $condor_uid = $htcondor::condor_uid - $condor_gid = $htcondor::condor_gid - - $use_htcondor_account_mapping = $htcondor::use_htcondor_account_mapping - $use_fs_auth = $htcondor::use_fs_auth - $use_password_auth = $htcondor::use_password_auth - $use_kerberos_auth = $htcondor::use_kerberos_auth - $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth - $use_cert_map_file = $htcondor::use_cert_map_file - $use_krb_map_file = $htcondor::use_krb_map_file - $use_pid_namespaces = $htcondor::use_pid_namespaces - - $machine_list_prefix = $htcondor::machine_list_prefix - $max_walltime = $htcondor::max_walltime - $max_cputime = $htcondor::max_cputime - + $is_scheduler = $htcondor::is_scheduler + $is_manager = $htcondor::is_manager + $is_worker = $htcondor::is_worker + $managers = $htcondor::managers # purge all non-managed config files from /etc/condor/config.d file { '/etc/condor/config.d': @@ -95,311 +14,24 @@ purge => true, } - $now = strftime('%d.%m.%Y_%H.%M') - $sched_daemon_list = ['SCHEDD'] - $worker_daemon_list = ['STARTD'] - $ganglia_daemon_list = ['GANGLIAD'] - $auth_string = construct_auth_string($use_fs_auth, - $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) - - # because HTCondor uses user 'condor_pool' for remote access - # and user 'condor' for local the variables below need to include - # both users in case a machine has more than one role (i.e. manager + CE) - $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" - - $manager_string_remote = join_machine_list($machine_list_prefix, $managers) - $manager_string_local = join_machine_list($machine_prefix_local, $managers) - $manager_string = join([$manager_string_remote, $manager_string_local], ', ' - ) - - $sched_string_remote = join_machine_list($machine_list_prefix, - $schedulers) - $sched_string_local = join_machine_list($machine_prefix_local, - $schedulers) - $sched_string = join([$sched_string_remote, $sched_string_local], ', ') - - $wn_string_remote = join_machine_list($machine_list_prefix, $workers - ) - $wn_string_local = join_machine_list($machine_prefix_local, - $workers) - $wn_string = join([$wn_string_remote, $wn_string_local], ', ') - - if $enable_multicore { - $manage_daemon_list = ['COLLECTOR', 'NEGOTIATOR', 'DEFRAG'] - } else { - $manage_daemon_list = ['COLLECTOR', 'NEGOTIATOR'] - } - - # default daemon, runs everywhere - $default_daemon_list = ['MASTER'] - - if $use_pkg_condor_config { - $common_config_files = [ - File['/etc/condor/condor_config.local'], - File['/etc/condor/config.d/10_security.config'], - ] - } else { - $common_config_files = [ - File['/etc/condor/condor_config'], - File['/etc/condor/condor_config.local'], - File['/etc/condor/config.d/10_security.config'], - ] - } - - if $is_scheduler and $is_manager { - # machine is both CE and manager (for small sites) - if $ganglia_cluster_name { - $temp_list = concat($default_daemon_list, $sched_daemon_list) - $temp2_list = concat($temp_list, $ganglia_daemon_list) - $daemon_list = concat($temp2_list, $manage_daemon_list) - $additional_config_files = [ - File['/etc/condor/config.d/12_resourcelimits.config'], - File['/etc/condor/config.d/21_schedd.config'], - File['/etc/condor/config.d/22_manager.config'], - File['/etc/condor/config.d/23_ganglia.config'], - ] - $config_files = concat($common_config_files, - $additional_config_files) - } else { - $temp_list = concat($default_daemon_list, $sched_daemon_list) - $daemon_list = concat($temp_list, $manage_daemon_list) - $additional_config_files = [ - File['/etc/condor/config.d/12_resourcelimits.config'], - File['/etc/condor/config.d/21_schedd.config'], - File['/etc/condor/config.d/22_manager.config'], - File['/etc/condor/config.d/33_defrag.config'], - ] - $config_files = concat($common_config_files, - $additional_config_files) - } - } elsif $is_scheduler { - $daemon_list = concat($default_daemon_list, $sched_daemon_list) - $additional_config_files = [ - File['/etc/condor/config.d/12_resourcelimits.config'], - File['/etc/condor/config.d/21_schedd.config'], - ] - $config_files = concat($common_config_files, - $additional_config_files) - } elsif $is_manager { - # machine running only manager - if $ganglia_cluster_name { - $temp_list = concat($default_daemon_list, - $manage_daemon_list) - $daemon_list = concat($temp_list, $ganglia_daemon_list) - $additional_config_files = [ - File['/etc/condor/config.d/22_manager.config'], - File['/etc/condor/config.d/23_ganglia.config'], - ] - $config_files = concat($common_config_files, - $additional_config_files) - } else { - $daemon_list = concat($default_daemon_list, - $manage_daemon_list) - $additional_config_files = [ - File['/etc/condor/config.d/22_manager.config'], - File['/etc/condor/config.d/33_defrag.config'], - ] - $config_files = concat($common_config_files, - $additional_config_files) - } - } elsif $is_worker { - $daemon_list = concat($default_daemon_list, $worker_daemon_list) - $additional_config_files = [File['/etc/condor/config.d/20_workernode.config' - ],] - $config_files = concat($common_config_files, - $additional_config_files) - } else { - $daemon_list = $default_daemon_list - $config_files = $common_config_files - } - # files common between machines - unless $use_pkg_condor_config { - file { '/etc/condor/condor_config': - backup => ".bak.${now}", - source => "puppet:///modules/${module_name}/condor_config", - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - } - - file { '/etc/condor/condor_config.local': - backup => ".bak.${now}", - content => template($template_config_local), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } + $common_class = 'htcondor::config::common' - file { '/etc/condor/config.d/10_security.config': - content => template($template_security), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - - if $pool_create { - $condor_directories = [ - $pool_home, - "${pool_home}/condor", - '/etc/condor/persistent'] - } else { - $condor_directories = ['/etc/condor/persistent'] - } - - file { $condor_directories: - ensure => directory, - owner => 'condor', - mode => '0644', - } - - if $use_kerberos_auth { - if $use_cert_map_file { - file { $cert_map_file: - ensure => present, - source => $certificate_mapfile, - owner => $condor_user, - group => $condor_group, - } - } - - if $use_krb_map_file { - file { $krb_map_file: - ensure => present, - source => $kerberos_mapfile, - owner => $condor_user, - group => $condor_group, - } - } - } + class { $common_class: } - if $use_password_auth { - # even if condor runs as condor, it just drops privileges and needs to start - # as root. - # if file is not owned by root, condor will throw this error : - # 06/12/14 15:38:40 error: SEC_PASSWORD_FILE must be owned by Condor's real - # uid - # 06/12/14 15:38:40 error: SEC_PASSWORD_FILE must be owned by Condor's real - # uid - file { '/etc/condor/pool_password': - ensure => present, - source => $pool_password, - owner => root, - group => root, - mode => '0640', - } - } + $daemon_list = create_daemon_list($is_worker, $is_scheduler, $is_manager, + $enable_multicore, $ganglia_cluster_name, size($managers) > 1) - # files for certain roles if $is_scheduler { - file { '/etc/condor/config.d/12_resourcelimits.config': - content => template($template_resourcelimits), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - - file { '/etc/condor/config.d/21_schedd.config': - content => template($template_schedd), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - - if $queues { - file { '/etc/condor/config.d/13_queues.config': - content => template($template_queues), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - } + class { 'htcondor::config::scheduler': require => Class[$common_class], } } if $is_manager { - if $use_accounting_groups { - file { '/etc/condor/config.d/11_fairshares.config': - content => template($template_fairshares), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - } - - file { '/etc/condor/config.d/22_manager.config': - content => template($template_manager), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - - if $ganglia_cluster_name { - file { '/etc/condor/config.d/23_ganglia.config': - content => template($template_ganglia), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - } - - file { '/etc/condor/config.d/33_defrag.config': - content => template($template_defrag), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - - if size($managers) > 1 { - $replication_machines = suffix($managers, ':$(REPLICATION_PORT)') - $had_machines = suffix($managers, ':$(HAD_PORT)') - $replication_list = join($replication_machines, ', ') - $had_list = join($had_machines, ', ') - - file { '/etc/condor/config.d/30_highavailability.config': - content => template($template_highavailability), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - } + class { 'htcondor::config::manager': require => Class[$common_class], } } if $is_worker { - file { '/etc/condor/config.d/20_workernode.config': - content => template($template_workernode), - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - } - - file { '/usr/local/bin/healhcheck_wn_condor': - source => "${health_check_script}", - owner => $condor_user, - group => $condor_group, - mode => '0655', - } - + class { 'htcondor::config::worker': require => Class[$common_class], } } - # this exec must be created in the service.pp file if we want to properly - # handle order including at first run, since the service must be started - # before the reconfig is done - # AND there is an upper Class order saying config must be done before starting - # service. - # $config_files is already a "File" resouce collection. - $config_files ~> Exec['/usr/sbin/condor_reconfig'] - } diff --git a/manifests/config/common.pp b/manifests/config/common.pp new file mode 100644 index 0000000..6e7d0a2 --- /dev/null +++ b/manifests/config/common.pp @@ -0,0 +1,46 @@ +# Common parts of condor configuration +class htcondor::config::common { + # general - manifest or 1 or more configs + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + # /etc/condor/condor_config.local + $admin_email = $htcondor::admin_email + $email_domain = $htcondor::email_domain + $condor_uid = $htcondor::condor_uid + $condor_gid = $htcondor::condor_gid + + $is_scheduler = $htcondor::is_scheduler + $use_htcondor_account_mapping = $htcondor::use_htcondor_account_mapping + $include_username_in_accounting = $htcondor::include_username_in_accounting + + $leave_job_in_queue = $htcondor::leave_job_in_queue + $request_memory = $htcondor::request_memory + $use_pkg_condor_config = $htcondor::use_pkg_condor_config + + $template_config_local = $htcondor::template_config_local + + $now = strftime('%d.%m.%Y_%H.%M') + + # files common between machines + unless $use_pkg_condor_config { + file { '/etc/condor/condor_config': + backup => ".bak.${now}", + source => "puppet:///modules/${module_name}/condor_config", + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } + + file { '/etc/condor/condor_config.local': + backup => ".bak.${now}", + content => template($template_config_local), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } +} diff --git a/manifests/config/manager.pp b/manifests/config/manager.pp new file mode 100644 index 0000000..ec6ffd6 --- /dev/null +++ b/manifests/config/manager.pp @@ -0,0 +1,85 @@ +# htcondor::config::manager +class htcondor::config::manager { + include htcondor::config::security + # general - manifest or 1 or more configs + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $enable_multicore = $htcondor::enable_multicore + $ganglia_cluster_name = $htcondor::ganglia_cluster_name + $managers = $htcondor::managers + $use_accounting_groups = $htcondor::use_accounting_groups + # /etc/condor/config.d/11_fairshares.config + $accounting_groups = $htcondor::accounting_groups + $default_prio_factor = $htcondor::default_prio_factor + $group_accept_surplus = $htcondor::group_accept_surplus + $group_autoregroup = $htcondor::group_autoregroup + $high_priority_groups = $htcondor::high_priority_groups + $priority_halflife = $htcondor::priority_halflife + # /etc/condor/config.d/22_manager.config + $collector_name = $htcondor::collector_name + $daemon_list = $htcondor::config::daemon_list + # template files + $template_defrag = $htcondor::template_defrag + $template_fairshares = $htcondor::template_fairshares + $template_ganglia = $htcondor::template_ganglia + $template_ha = $htcondor::template_highavailability + $template_manager = $htcondor::template_manager + + if $use_accounting_groups { + file { '/etc/condor/config.d/11_fairshares.config': + content => template($template_fairshares), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } + + file { '/etc/condor/config.d/22_manager.config': + content => template($template_manager), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + + if $ganglia_cluster_name { + file { '/etc/condor/config.d/23_ganglia.config': + content => template($template_ganglia), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } + + if $enable_multicore { + file { '/etc/condor/config.d/33_defrag.config': + content => template($template_defrag), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } + + if size($managers) > 1 { + $replication_machines = suffix($managers, ':$(REPLICATION_PORT)') + $had_machines = suffix($managers, ':$(HAD_PORT)') + $replication_list = join($replication_machines, ', ') + $had_list = join($had_machines, ', ') + + file { '/etc/condor/config.d/30_highavailability.config': + content => template($template_ha), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } +} diff --git a/manifests/config/scheduler.pp b/manifests/config/scheduler.pp new file mode 100644 index 0000000..a13f0ef --- /dev/null +++ b/manifests/config/scheduler.pp @@ -0,0 +1,51 @@ +# htcondor::config::scheduler +class htcondor::config::scheduler { + include htcondor::config::security + + # general - manifest or 1 or more configs + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + # 12_resourcelimits.config + $max_walltime = $htcondor::max_walltime + $max_cputime = $htcondor::max_cputime + # /etc/condor/config.d/13_queues.config + $queues = $htcondor::queues + $periodic_expr_interval = $htcondor::periodic_expr_interval + $max_periodic_expr_interval = $htcondor::max_periodic_expr_interval + $remove_held_jobs_after = $htcondor::remove_held_jobs_after + # /etc/condor/config.d/21_schedd.config + $daemon_list = $htcondor::config::daemon_list + # template files + $template_queues = $htcondor::template_queues + $template_resourcelimits = $htcondor::template_resourcelimits + $template_schedd = $htcondor::template_schedd + + file { '/etc/condor/config.d/12_resourcelimits.config': + content => template($template_resourcelimits), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + + if $queues { + file { '/etc/condor/config.d/13_queues.config': + content => template($template_queues), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } + + file { '/etc/condor/config.d/21_schedd.config': + content => template($template_schedd), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } +} diff --git a/manifests/config/security.pp b/manifests/config/security.pp new file mode 100644 index 0000000..ccf9618 --- /dev/null +++ b/manifests/config/security.pp @@ -0,0 +1,101 @@ +class htcondor::config::security { + # general - manifest or 1 or more configs + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $pool_password_file = $htcondor::pool_password + + $schedulers = $htcondor::schedulers + $managers = $htcondor::managers + $workers = $htcondor::workers + + $use_fs_auth = $htcondor::use_fs_auth + $use_password_auth = $htcondor::use_password_auth + $use_kerberos_auth = $htcondor::use_kerberos_auth + $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth + + $use_cert_map_file = $htcondor::use_cert_map_file + $cert_map_file = $htcondor::cert_map_file + $cert_map_file_source = $htcondor::certificate_mapfile + + $use_krb_map_file = $htcondor::use_krb_map_file + $krb_map_file = $htcondor::krb_map_file + $krb_map_file_source = $htcondor::kerberos_mapfile + + # /etc/condor/config.d/10_security.config + $default_domain_name = $htcondor::default_domain_name + $filesystem_domain = $htcondor::filesystem_domain + $is_worker = $htcondor::is_worker + $machine_list_prefix = $htcondor::machine_list_prefix + $uid_domain = $htcondor::uid_domain + + # template files + $template_security = $htcondor::template_security + + $auth_string = construct_auth_string($use_fs_auth, + $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) + + # because HTCondor uses user 'condor_pool' for remote access + # and user 'condor' for local the variables below need to include + # both users in case a machine has more than one role (i.e. manager + CE) + $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" + + $manager_string_remote = join_machine_list($machine_list_prefix, $managers) + $manager_string_local = join_machine_list($machine_prefix_local, $managers) + $manager_string = join([$manager_string_remote, $manager_string_local], ', ' + ) + + $sched_string_remote = join_machine_list($machine_list_prefix, $schedulers) + $sched_string_local = join_machine_list($machine_prefix_local, $schedulers) + $sched_string = join([$sched_string_remote, $sched_string_local], ', ' + ) + + $wn_string_remote = join_machine_list($machine_list_prefix, $workers) + $wn_string_local = join_machine_list($machine_prefix_local, $workers) + $wn_string = join([$wn_string_remote, $wn_string_local], ', ') + + file { '/etc/condor/config.d/10_security.config': + content => template($template_security), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + + if $use_password_auth { + # even if condor runs as condor, it just drops privileges and needs to start + # as root. + # if file is not owned by root, condor will throw this error : + # 06/12/14 15:38:40 error: SEC_PASSWORD_FILE must be owned by Condor's real + # uid + # 06/12/14 15:38:40 error: SEC_PASSWORD_FILE must be owned by Condor's real + # uid + file { '/etc/condor/pool_password': + ensure => present, + source => $pool_password_file, + owner => root, + group => root, + mode => '0640', + } + } + + if $use_kerberos_auth { + if $use_cert_map_file { + file { $cert_map_file: + ensure => present, + source => $cert_map_file_source, + owner => $condor_user, + group => $condor_group, + } + } + + if $use_krb_map_file { + file { $krb_map_file: + ensure => present, + source => $krb_map_file_source, + owner => $condor_user, + group => $condor_group, + } + } + } +} diff --git a/manifests/config/worker.pp b/manifests/config/worker.pp new file mode 100644 index 0000000..6d06ffc --- /dev/null +++ b/manifests/config/worker.pp @@ -0,0 +1,55 @@ +# htcondor::config::scheduler +class htcondor::config::worker { + include htcondor::config::security + + # general - manifest or 1 or more configs + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $health_check_script = $htcondor::health_check_script + # /etc/condor/config.d/20_workernode.config + $custom_attribute = $htcondor::custom_attribute + $daemon_list = $htcondor::config::daemon_list + $enable_cgroup = $htcondor::enable_cgroup + $enable_healthcheck = $htcondor::enable_healthcheck + $machine_owner = $htcondor::machine_owner + $memory_overcommit = $htcondor::memory_overcommit + $number_of_cpus = $htcondor::number_of_cpus + $partitionable_slots = $htcondor::partitionable_slots + $pool_create = $htcondor::pool_create + $pool_home = $htcondor::pool_home + $use_pid_namespaces = $htcondor::use_pid_namespaces + # template files + $template_workernode = $htcondor::template_workernode + + + file { '/etc/condor/config.d/20_workernode.config': + content => template($template_workernode), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + + file { '/usr/local/bin/healhcheck_wn_condor': + source => $health_check_script, + owner => $condor_user, + group => $condor_group, + mode => '0655', + } + + if $pool_create { + $condor_directories = [ + $pool_home, + "${pool_home}/condor", + '/etc/condor/persistent'] + } else { + $condor_directories = ['/etc/condor/persistent'] + } + + file { $condor_directories: + ensure => directory, + owner => 'condor', + mode => '0644', + } +} diff --git a/manifests/params.pp b/manifests/params.pp index 754d43e..091ca6a 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -67,8 +67,7 @@ $max_walltime = hiera('max_walltime', '80 * 60 * 60') $max_cputime = hiera('max_cputime', '80 * 60 * 60') - $ganglia_cluster_name = hiera('ganglia_cluster_name', 'Example HTCondor' - ) + $ganglia_cluster_name = hiera('ganglia_cluster_name', undef) $uid_domain = hiera('uid_domain', 'example.org') $default_domain_name = hiera('default_domain_name', $uid_domain) diff --git a/templates/13_queues.config.erb b/templates/13_queues.config.erb index a937ee9..beec953 100644 --- a/templates/13_queues.config.erb +++ b/templates/13_queues.config.erb @@ -2,4 +2,4 @@ PERIODIC_EXPR_INTERVAL = <%= @periodic_expr_interval %> MAX_PERIODIC_EXPR_INTERVAL = <%= @max_periodic_expr_interval %> SYSTEM_PERIODIC_HOLD = !member(BatchQueue, {<%= @queues.keys.map{|k| '"' + k + '"'}.join(', ') %>}) SYSTEM_PERIODIC_HOLD_REASON = "Invalid queue" -SYSTEM_PERIODIC_REMOVE = (JobStatus == 5 && HoldReasonCode == 26 && (CurrentTime - EnteredCurrentStatus) > <%= remove_held_jobs_after %>) +SYSTEM_PERIODIC_REMOVE = (JobStatus == 5 && HoldReasonCode == 26 && (CurrentTime - EnteredCurrentStatus) > <%= @remove_held_jobs_after %>) diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index 1adb613..0471a18 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -1,11 +1,7 @@ MachineOwner = <%= @machine_owner %> StartJobs = True ## NUM_CPUS based on heira (if defined), otherwise on facter -<% if @number_of_cpus -%> NUM_CPUS = <%= @number_of_cpus %> -<% else -%> -NUM_CPUS = <%= @processorcount %> -<% end -%> DETECTED_CPUS = $NUM_CPUS @@ -102,4 +98,4 @@ CGROUP_MEMORY_LIMIT = soft ## This macro determines what daemons the condor_master will start and keep its watchful eyes on. ## The list is a comma or space separated list of subsystem names -DAEMON_LIST = <%= @daemon_list.flatten.join(', ') %> +DAEMON_LIST = <%= @daemon_list -%> diff --git a/templates/21_schedd.config.erb b/templates/21_schedd.config.erb index 57c7557..d358094 100644 --- a/templates/21_schedd.config.erb +++ b/templates/21_schedd.config.erb @@ -49,4 +49,4 @@ MAX_NUM_MASTER_LOG = 10 ## This macro determines what daemons the condor_master will start and keep its watchful eyes on. ## The list is a comma or space separated list of subsystem names -DAEMON_LIST = <%= @daemon_list.flatten.join(', ') %> +DAEMON_LIST = <%= @daemon_list -%> diff --git a/templates/22_manager.config.erb b/templates/22_manager.config.erb index efd8785..2478436 100644 --- a/templates/22_manager.config.erb +++ b/templates/22_manager.config.erb @@ -40,4 +40,4 @@ MAX_NUM_MASTER_LOG = 10 ## This macro determines what daemons the condor_master will start and keep its watchful eyes on. ## The list is a comma or space separated list of subsystem names -DAEMON_LIST = <%= @daemon_list.flatten.join(', ') %> +DAEMON_LIST = <%= @daemon_list -%> diff --git a/templates/30_highavailability.config.erb b/templates/30_highavailability.config.erb index aa0b51f..db99975 100644 --- a/templates/30_highavailability.config.erb +++ b/templates/30_highavailability.config.erb @@ -51,9 +51,7 @@ HAD = $(SBIN)/condor_had REPLICATION = $(SBIN)/condor_replication TRANSFERER = $(LIBEXEC)/condor_transferer -## the master should start at least these five daemons -DAEMON_LIST = MASTER, COLLECTOR, NEGOTIATOR, HAD, REPLICATION -## DC_Daemon list should contain at least these five +DAEMON_LIST = <%= @daemon_list -%> DC_DAEMON_LIST = +HAD, REPLICATION ## Enables/disables the replication feature of HAD daemon From 1072b50a1356bd4795c84a17e96b5a2690bf2835 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 16:01:41 +0000 Subject: [PATCH 24/54] added easier way to specify the DAEMON_LIST --- .../parser/functions/create_daemon_list.rb | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/puppet/parser/functions/create_daemon_list.rb diff --git a/lib/puppet/parser/functions/create_daemon_list.rb b/lib/puppet/parser/functions/create_daemon_list.rb new file mode 100644 index 0000000..4a7be25 --- /dev/null +++ b/lib/puppet/parser/functions/create_daemon_list.rb @@ -0,0 +1,46 @@ +module Puppet::Parser::Functions + newfunction(:create_daemon_list, :type => :rvalue) do |args| + raise(Puppet::ParseError, "create_daemon_list() wrong number of arguments. Given: #{args.size} for 6)") if args.size != 6 + is_worker = args[0] + is_scheduler = args[1] + is_manager = args[2] + + defrag = args[3] + ganglia = args[4] + high_availability= args[5] + + default_list = ['MASTER'] + worker_daemon_list = ['STARTD'] + sched_daemon_list = ['SCHEDD'] + manage_daemon_list = ['COLLECTOR', 'NEGOTIATOR'] + ganglia_daemon_list = ['GANGLIAD'] + #HAD, REPLICATION + + daemon_list = Array.new + # all nodes have master + daemon_list.push 'MASTER' + + if is_worker == true + daemon_list.push 'STARTD' + end + if is_scheduler == true + daemon_list.push 'SCHEDD' + end + if is_manager == true + daemon_list.push 'COLLECTOR' + daemon_list.push 'NEGOTIATOR' + end + if defrag == true + daemon_list.push 'DEFRAG' + end + if ganglia == true + daemon_list.push 'GANGLIAD' + end + if high_availability == true + high_availability.push 'HAD' + high_availability.push 'REPLICATION' + end + + return daemon_list.join(", ") + end +end From 60ba55277ab2cd18476898e994ee271e22b12dd3 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 16:02:07 +0000 Subject: [PATCH 25/54] updated README HTcondor link to latest version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 500158e..3cbb8a6 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Therefore, the first step is to install the latest HTCondor repository for your ``` yum install -y https://research.cs.wisc.edu/htcondor/yum/repo.d/htcondor-stable-rhel6.repo ``` -If you wish to use a [pool password for authentication](http://research.cs.wisc.edu/htcondor/manual/v8.4/3_6Security.html#SECTION00463400000000000000) you will need to create one first: ```condor_store_cred -f /files/pool_password```. +If you wish to use a [pool password for authentication](http://research.cs.wisc.edu/htcondor/manual/latest/3_6Security.html#SECTION00463400000000000000) you will need to create one first: ```condor_store_cred -f /files/pool_password```. ##Limitations ###General From 02b134162dc5286bc8bfaf525f44b07012ae5e7f Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 10 Feb 2016 16:06:09 +0000 Subject: [PATCH 26/54] added hiera example --- examples/hiera_example.yaml | 62 +++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 examples/hiera_example.yaml diff --git a/examples/hiera_example.yaml b/examples/hiera_example.yaml new file mode 100644 index 0000000..05a852b --- /dev/null +++ b/examples/hiera_example.yaml @@ -0,0 +1,62 @@ +--- +htcondor::collectors: + - collector1.example.org + - collector2.example.org + +htcondor::schedulers: + - sched1.example.org + - sched2.example.org + +htcondor::workers: + - '*.example.org' + +htcondor::uid_domain: 'example.org' +htcondor::collector_name: 'Example-HTCondor-Cluster' +htcondor::cluster_has_multiple_domains: true +htcondor::admin_email: localhost +htcondor::run_as_user: condor +htcondor::run_as_group: condor +htcondor::machine_owner: condor +htcondor::ganglia_cluster_name: 'Example HTCondor' +htcondor::include_username_in_accounting: true + +# max CPU time in seconds - for schedulers only +htcondor::max_cputime: '80 * 60 * 60' +htcondor::max_walltime: '80 * 60 * 60' + +htcondor::per_job_history_dir: '/var/spool/condor/history' +htcondor::pool_home: '/condor' +htcondor::use_pkg_condor_config: true + +htcondor::high_priority_groups: + dteam: -10 + cms.admin: -30 + ops: -20 + ops.admin: -21 + +htcondor::use_accounting_groups: true +htcondor::accounting_groups: + alice: + dynamic_quota: 0.01 + priority_factor: 10000.0 + atlas: + dynamic_quota: 0.01 + priority_factor: 10000.0 + cms: + dynamic_quota: 0.15 + priority_factor: 10000.0 + dteam: + dynamic_quota: 0.01 + priority_factor: 100.0 + ilc: + dynamic_quota: 0.01 + priority_factor: 10000.0 + lhcb: + dynamic_quota: 0.05 + priority_factor: 10000.0 + ops: + dynamic_quota: 0.01 + priority_factor: 1.0 + cms.admin: + dynamic_quota: 0.01 + priority_factor: 1.0 From 38d1e6bc1a19808d25e2527776aee141d476a6ff Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 12:53:59 +0000 Subject: [PATCH 27/54] added tests for daemon list creation and fixed bug in the same --- .../parser/functions/create_daemon_list.rb | 17 +++----- .../functions/create_daemon_list_spec.rb | 40 +++++++++++++++++++ 2 files changed, 45 insertions(+), 12 deletions(-) create mode 100644 spec/unit/puppet/parser/functions/create_daemon_list_spec.rb diff --git a/lib/puppet/parser/functions/create_daemon_list.rb b/lib/puppet/parser/functions/create_daemon_list.rb index 4a7be25..c2fe9e1 100644 --- a/lib/puppet/parser/functions/create_daemon_list.rb +++ b/lib/puppet/parser/functions/create_daemon_list.rb @@ -9,13 +9,6 @@ module Puppet::Parser::Functions ganglia = args[4] high_availability= args[5] - default_list = ['MASTER'] - worker_daemon_list = ['STARTD'] - sched_daemon_list = ['SCHEDD'] - manage_daemon_list = ['COLLECTOR', 'NEGOTIATOR'] - ganglia_daemon_list = ['GANGLIAD'] - #HAD, REPLICATION - daemon_list = Array.new # all nodes have master daemon_list.push 'MASTER' @@ -30,15 +23,15 @@ module Puppet::Parser::Functions daemon_list.push 'COLLECTOR' daemon_list.push 'NEGOTIATOR' end - if defrag == true + if defrag == true and is_manager == true daemon_list.push 'DEFRAG' end - if ganglia == true + if ganglia == true and (is_manager == true or is_scheduler == true) daemon_list.push 'GANGLIAD' end - if high_availability == true - high_availability.push 'HAD' - high_availability.push 'REPLICATION' + if high_availability == true and is_manager == true + daemon_list.push 'HAD' + daemon_list.push 'REPLICATION' end return daemon_list.join(", ") diff --git a/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb b/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb new file mode 100644 index 0000000..840ae7c --- /dev/null +++ b/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb @@ -0,0 +1,40 @@ +require 'spec_helper' +require 'puppetlabs_spec_helper/puppetlabs_spec/puppet_internals' + +describe "create_daemon_list function" do + let(:scope) { PuppetlabsSpec::PuppetInternals.scope } + it "should exist" do + expect(Puppet::Parser::Functions.function("create_daemon_list")).to eq("function_create_daemon_list") + end + + context 'create_daemon_list test' do + it "get worker list right" do + result = scope.function_create_daemon_list([true, false, false, false, false, false]) + expect(result).to eq('MASTER, STARTD') + end + it "get scheduler right" do + result = scope.function_create_daemon_list([false, true, false, false, false, false]) + expect(result).to eq('MASTER, SCHEDD') + end + it "get scheduler with ganglia right" do + result = scope.function_create_daemon_list([false, true, false, false, true, false]) + expect(result).to eq('MASTER, SCHEDD, GANGLIAD') + end + it "get manager right" do + result = scope.function_create_daemon_list([false, false, true, false, false, false]) + expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR') + end + it "get multicore manager right" do + result = scope.function_create_daemon_list([false, false, true, true, false, false]) + expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR, DEFRAG') + end + it "multiple managers" do + result = scope.function_create_daemon_list([false, false, true, false, false, true]) + expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR, HAD, REPLICATION') + end + it "all the daemons" do + result = scope.function_create_daemon_list([true, true, true, true, true, true]) + expect(result).to eq('MASTER, STARTD, SCHEDD, COLLECTOR, NEGOTIATOR, DEFRAG, GANGLIAD, HAD, REPLICATION') + end + end +end From 06797a70fa0eb8a9bb4a4bdbde2148218cca8e44 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 12:54:13 +0000 Subject: [PATCH 28/54] fixed hiera example entry (collectors -> managers) --- examples/hiera_example.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/hiera_example.yaml b/examples/hiera_example.yaml index 05a852b..0f1ee9b 100644 --- a/examples/hiera_example.yaml +++ b/examples/hiera_example.yaml @@ -1,5 +1,5 @@ --- -htcondor::collectors: +htcondor::managers: - collector1.example.org - collector2.example.org From e70736320088f3c33c282e86679c5b5511e0a03c Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 15:06:35 +0000 Subject: [PATCH 29/54] removed repository dependency if they are not installed --- manifests/init.pp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/manifests/init.pp b/manifests/init.pp index 82eba01..9c573fb 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -189,6 +189,7 @@ ::htcondor::params { if $install_repositories { class { 'htcondor::repositories': } + Class['htcondor::repositories'] -> Class['htcondor::install'] } class { 'htcondor::install': @@ -200,6 +201,6 @@ class { 'htcondor::service': } - Class['htcondor::repositories'] -> Class['htcondor::install'] -> Class['htcondor::config' - ] -> Class['htcondor::service'] + Class['htcondor::install'] -> Class['htcondor::config'] -> Class['htcondor::service' + ] } From f7a9b5c239e0a8a113d5c9396df8e201f6e2e874 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 15:31:59 +0000 Subject: [PATCH 30/54] added connection broker configuration --- manifests/config/security.pp | 74 +++++++++++++++++--------------- manifests/init.pp | 2 + manifests/params.pp | 4 ++ templates/10_security.config.erb | 5 +++ 4 files changed, 51 insertions(+), 34 deletions(-) diff --git a/manifests/config/security.pp b/manifests/config/security.pp index ccf9618..3652f31 100644 --- a/manifests/config/security.pp +++ b/manifests/config/security.pp @@ -1,57 +1,63 @@ class htcondor::config::security { # general - manifest or 1 or more configs - $condor_user = $htcondor::condor_user - $condor_group = $htcondor::condor_group - $pool_password_file = $htcondor::pool_password + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $pool_password_file = $htcondor::pool_password - $schedulers = $htcondor::schedulers - $managers = $htcondor::managers - $workers = $htcondor::workers + $schedulers = $htcondor::schedulers + $managers = $htcondor::managers + $workers = $htcondor::workers - $use_fs_auth = $htcondor::use_fs_auth - $use_password_auth = $htcondor::use_password_auth - $use_kerberos_auth = $htcondor::use_kerberos_auth - $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth + $use_fs_auth = $htcondor::use_fs_auth + $use_password_auth = $htcondor::use_password_auth + $use_kerberos_auth = $htcondor::use_kerberos_auth + $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth - $use_cert_map_file = $htcondor::use_cert_map_file - $cert_map_file = $htcondor::cert_map_file - $cert_map_file_source = $htcondor::certificate_mapfile + $use_cert_map_file = $htcondor::use_cert_map_file + $cert_map_file = $htcondor::cert_map_file + $cert_map_file_source = $htcondor::certificate_mapfile - $use_krb_map_file = $htcondor::use_krb_map_file - $krb_map_file = $htcondor::krb_map_file - $krb_map_file_source = $htcondor::kerberos_mapfile + $use_krb_map_file = $htcondor::use_krb_map_file + $krb_map_file = $htcondor::krb_map_file + $krb_map_file_source = $htcondor::kerberos_mapfile # /etc/condor/config.d/10_security.config - $default_domain_name = $htcondor::default_domain_name - $filesystem_domain = $htcondor::filesystem_domain - $is_worker = $htcondor::is_worker - $machine_list_prefix = $htcondor::machine_list_prefix - $uid_domain = $htcondor::uid_domain + $default_domain_name = $htcondor::default_domain_name + $filesystem_domain = $htcondor::filesystem_domain + $is_worker = $htcondor::is_worker + $machine_list_prefix = $htcondor::machine_list_prefix + $uid_domain = $htcondor::uid_domain + + # for private networks + $uses_connection_broker = $htcondor::uses_connection_broker + $private_network_name = $htcondor::private_network_name # template files - $template_security = $htcondor::template_security + $template_security = $htcondor::template_security - $auth_string = construct_auth_string($use_fs_auth, + $auth_string = construct_auth_string($use_fs_auth, $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) # because HTCondor uses user 'condor_pool' for remote access # and user 'condor' for local the variables below need to include # both users in case a machine has more than one role (i.e. manager + CE) - $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" + $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" - $manager_string_remote = join_machine_list($machine_list_prefix, $managers) - $manager_string_local = join_machine_list($machine_prefix_local, $managers) - $manager_string = join([$manager_string_remote, $manager_string_local], ', ' - ) + $manager_string_remote = join_machine_list($machine_list_prefix, $managers) + $manager_string_local = join_machine_list($machine_prefix_local, $managers) + $manager_string = join([ + $manager_string_remote, + $manager_string_local], ', ') - $sched_string_remote = join_machine_list($machine_list_prefix, $schedulers) - $sched_string_local = join_machine_list($machine_prefix_local, $schedulers) - $sched_string = join([$sched_string_remote, $sched_string_local], ', ' + $sched_string_remote = join_machine_list($machine_list_prefix, $schedulers) + $sched_string_local = join_machine_list($machine_prefix_local, $schedulers + ) + $sched_string = join([$sched_string_remote, $sched_string_local], ', ' ) - $wn_string_remote = join_machine_list($machine_list_prefix, $workers) - $wn_string_local = join_machine_list($machine_prefix_local, $workers) - $wn_string = join([$wn_string_remote, $wn_string_local], ', ') + $wn_string_remote = join_machine_list($machine_list_prefix, $workers) + $wn_string_local = join_machine_list($machine_prefix_local, $workers) + $wn_string = join([$wn_string_remote, $wn_string_local], ', ') file { '/etc/condor/config.d/10_security.config': content => template($template_security), diff --git a/manifests/init.pp b/manifests/init.pp index 9c573fb..742db0a 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -181,6 +181,8 @@ $use_cert_map_file = $htcondor::params::use_cert_map_file, $use_krb_map_file = $htcondor::params::use_krb_map_file, $use_pid_namespaces = $htcondor::params::use_pid_namespaces, + $uses_connection_broker = $htcondor::params::uses_connection_broker, + $private_network_name = $htcondor::params::private_network_name, $cert_map_file = $htcondor::params::cert_map_file, $krb_map_file = $htcondor::params::krb_map_file, $machine_list_prefix = $htcondor::params::machine_list_prefix, diff --git a/manifests/params.pp b/manifests/params.pp index 091ca6a..7b215e5 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -103,6 +103,10 @@ $pool_password_file = hiera('pool_password_file', "puppet:///modules/${module_name}/pool_password" ) + # for private networks + $uses_connection_broker = hiera('uses_connection_broker', false) + $private_network_name = hiera('private_network_name', $::domain) + # notification settings $admin_email = hiera('admin_email', 'localhost') $email_domain = hiera('email_domain', 'localhost') diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 3bcb03f..d2f518b 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -10,6 +10,11 @@ FILESYSTEM_DOMAIN = <%= @filesystem_domain %> COLLECTOR_HOST = <%= @managers.flatten.join(', ') %> +<% if @uses_connection_broker == true then -%> +CCB_ADDRESS = $(COLLECTOR_HOST) +PRIVATE_NETWORK_NAME = <%= @private_network_name -%> +<% end -%> + <% if @cluster_has_multiple_domains == true then -%> TRUST_UID_DOMAIN = True <% end -%> From e2e48bbcff2f86214ffc4970c8a147b5be8fabff Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 15:55:33 +0000 Subject: [PATCH 31/54] workers should not be able to write to scheduler --- templates/10_security.config.erb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index d2f518b..226409e 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -46,7 +46,7 @@ COLLECTOR.ALLOW_ADVERTISE_MASTER = $(CES), $(CMS), $(WNS) COLLECTOR.ALLOW_ADVERTISE_SCHEDD = $(CES) COLLECTOR.ALLOW_ADVERTISE_STARTD = $(WNS) -SCHEDD.ALLOW_WRITE = $(USERS), $(CES), $(WNS) +SCHEDD.ALLOW_WRITE = $(USERS), $(CES) ALLOW_DAEMON = condor@$(UID_DOMAIN), \ condor@$(UID_DOMAIN)/*.$(UID_DOMAIN), \ From 04a2a9f3ecaf275f755c9b9fce923fec108c6fe3 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 16:13:57 +0000 Subject: [PATCH 32/54] added missing cluster_has_multiple_domains --- manifests/config/security.pp | 82 ++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/manifests/config/security.pp b/manifests/config/security.pp index 3652f31..6068e90 100644 --- a/manifests/config/security.pp +++ b/manifests/config/security.pp @@ -1,63 +1,71 @@ class htcondor::config::security { # general - manifest or 1 or more configs - $condor_user = $htcondor::condor_user - $condor_group = $htcondor::condor_group - $pool_password_file = $htcondor::pool_password + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $pool_password_file = $htcondor::pool_password - $schedulers = $htcondor::schedulers - $managers = $htcondor::managers - $workers = $htcondor::workers + $schedulers = $htcondor::schedulers + $managers = $htcondor::managers + $workers = $htcondor::workers - $use_fs_auth = $htcondor::use_fs_auth - $use_password_auth = $htcondor::use_password_auth - $use_kerberos_auth = $htcondor::use_kerberos_auth - $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth + $use_fs_auth = $htcondor::use_fs_auth + $use_password_auth = $htcondor::use_password_auth + $use_kerberos_auth = $htcondor::use_kerberos_auth + $use_claim_to_be_auth = $htcondor::use_claim_to_be_auth - $use_cert_map_file = $htcondor::use_cert_map_file - $cert_map_file = $htcondor::cert_map_file - $cert_map_file_source = $htcondor::certificate_mapfile + $use_cert_map_file = $htcondor::use_cert_map_file + $cert_map_file = $htcondor::cert_map_file + $cert_map_file_source = $htcondor::certificate_mapfile - $use_krb_map_file = $htcondor::use_krb_map_file - $krb_map_file = $htcondor::krb_map_file - $krb_map_file_source = $htcondor::kerberos_mapfile + $use_krb_map_file = $htcondor::use_krb_map_file + $krb_map_file = $htcondor::krb_map_file + $krb_map_file_source = $htcondor::kerberos_mapfile # /etc/condor/config.d/10_security.config - $default_domain_name = $htcondor::default_domain_name - $filesystem_domain = $htcondor::filesystem_domain - $is_worker = $htcondor::is_worker - $machine_list_prefix = $htcondor::machine_list_prefix - $uid_domain = $htcondor::uid_domain + $cluster_has_multiple_domains = $htcondor::cluster_has_multiple_domains + $default_domain_name = $htcondor::default_domain_name + $filesystem_domain = $htcondor::filesystem_domain + $is_worker = $htcondor::is_worker + $machine_list_prefix = $htcondor::machine_list_prefix + $uid_domain = $htcondor::uid_domain # for private networks - $uses_connection_broker = $htcondor::uses_connection_broker - $private_network_name = $htcondor::private_network_name + $uses_connection_broker = $htcondor::uses_connection_broker + $private_network_name = $htcondor::private_network_name # template files - $template_security = $htcondor::template_security + $template_security = $htcondor::template_security - $auth_string = construct_auth_string($use_fs_auth, + $auth_string = construct_auth_string($use_fs_auth, $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) # because HTCondor uses user 'condor_pool' for remote access # and user 'condor' for local the variables below need to include # both users in case a machine has more than one role (i.e. manager + CE) - $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" + $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" - $manager_string_remote = join_machine_list($machine_list_prefix, $managers) - $manager_string_local = join_machine_list($machine_prefix_local, $managers) - $manager_string = join([ + $manager_string_remote = join_machine_list($machine_list_prefix, + $managers) + $manager_string_local = join_machine_list($machine_prefix_local, + $managers) + $manager_string = join([ $manager_string_remote, $manager_string_local], ', ') - $sched_string_remote = join_machine_list($machine_list_prefix, $schedulers) - $sched_string_local = join_machine_list($machine_prefix_local, $schedulers + $sched_string_remote = join_machine_list($machine_list_prefix, + $schedulers) + $sched_string_local = join_machine_list($machine_prefix_local, + $schedulers) + $sched_string = join([ + $sched_string_remote, + $sched_string_local], ', ') + + $wn_string_remote = join_machine_list($machine_list_prefix, + $workers) + $wn_string_local = join_machine_list($machine_prefix_local, + $workers) + $wn_string = join([$wn_string_remote, $wn_string_local], ', ' ) - $sched_string = join([$sched_string_remote, $sched_string_local], ', ' - ) - - $wn_string_remote = join_machine_list($machine_list_prefix, $workers) - $wn_string_local = join_machine_list($machine_prefix_local, $workers) - $wn_string = join([$wn_string_remote, $wn_string_local], ', ') file { '/etc/condor/config.d/10_security.config': content => template($template_security), From 680e0a81295a2e9c32e30ad23c7255f07bbff0be Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 16:16:03 +0000 Subject: [PATCH 33/54] ensuring that configs end on a new line --- templates/11_fairshares.config.erb | 2 +- templates/20_workernode.config.erb | 2 +- templates/21_schedd.config.erb | 2 +- templates/22_manager.config.erb | 2 +- templates/30_highavailability.config.erb | 2 +- templates/condor_config.local.erb | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/templates/11_fairshares.config.erb b/templates/11_fairshares.config.erb index c5e081a..b0c24ad 100644 --- a/templates/11_fairshares.config.erb +++ b/templates/11_fairshares.config.erb @@ -44,4 +44,4 @@ GROUP_PRIO_FACTOR_group_<%= key %> = <%= value['priority_factor'] %> <% if value.has_key?('dynamic_quota') -%> GROUP_QUOTA_DYNAMIC_group_<%= key %> = <%= value['dynamic_quota'] %> <% end -%> -<% end -%> +<% end %> diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index 0471a18..10187fd 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -98,4 +98,4 @@ CGROUP_MEMORY_LIMIT = soft ## This macro determines what daemons the condor_master will start and keep its watchful eyes on. ## The list is a comma or space separated list of subsystem names -DAEMON_LIST = <%= @daemon_list -%> +DAEMON_LIST = <%= @daemon_list %> diff --git a/templates/21_schedd.config.erb b/templates/21_schedd.config.erb index d358094..0b7b9e2 100644 --- a/templates/21_schedd.config.erb +++ b/templates/21_schedd.config.erb @@ -49,4 +49,4 @@ MAX_NUM_MASTER_LOG = 10 ## This macro determines what daemons the condor_master will start and keep its watchful eyes on. ## The list is a comma or space separated list of subsystem names -DAEMON_LIST = <%= @daemon_list -%> +DAEMON_LIST = <%= @daemon_list %> diff --git a/templates/22_manager.config.erb b/templates/22_manager.config.erb index 2478436..43d653f 100644 --- a/templates/22_manager.config.erb +++ b/templates/22_manager.config.erb @@ -40,4 +40,4 @@ MAX_NUM_MASTER_LOG = 10 ## This macro determines what daemons the condor_master will start and keep its watchful eyes on. ## The list is a comma or space separated list of subsystem names -DAEMON_LIST = <%= @daemon_list -%> +DAEMON_LIST = <%= @daemon_list %> diff --git a/templates/30_highavailability.config.erb b/templates/30_highavailability.config.erb index db99975..bb7274e 100644 --- a/templates/30_highavailability.config.erb +++ b/templates/30_highavailability.config.erb @@ -51,7 +51,7 @@ HAD = $(SBIN)/condor_had REPLICATION = $(SBIN)/condor_replication TRANSFERER = $(LIBEXEC)/condor_transferer -DAEMON_LIST = <%= @daemon_list -%> +DAEMON_LIST = <%= @daemon_list %> DC_DAEMON_LIST = +HAD, REPLICATION ## Enables/disables the replication feature of HAD daemon diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index 1c047bb..be3e032 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -41,4 +41,4 @@ ConcurrencyLimits = strcat(AcctGroup, ",", AcctSubGroup, ",", Owner) SUBMIT_EXPRS = $(SUBMIT_EXPRS) AcctGroup, AcctSubGroup, AccountingGroup, ConcurrencyLimits <% else -%> -<% end -%> +<% end %> From 683ec35e67feca4fbfbdc048e89a14b59ce0cde5 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 16:29:39 +0000 Subject: [PATCH 34/54] fixed string treated as bool in daemon list --- lib/puppet/parser/functions/create_daemon_list.rb | 2 +- .../unit/puppet/parser/functions/create_daemon_list_spec.rb | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/puppet/parser/functions/create_daemon_list.rb b/lib/puppet/parser/functions/create_daemon_list.rb index c2fe9e1..0da08b6 100644 --- a/lib/puppet/parser/functions/create_daemon_list.rb +++ b/lib/puppet/parser/functions/create_daemon_list.rb @@ -26,7 +26,7 @@ module Puppet::Parser::Functions if defrag == true and is_manager == true daemon_list.push 'DEFRAG' end - if ganglia == true and (is_manager == true or is_scheduler == true) + if ganglia and (is_manager == true or is_scheduler == true) daemon_list.push 'GANGLIAD' end if high_availability == true and is_manager == true diff --git a/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb b/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb index 840ae7c..22fd5ee 100644 --- a/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb +++ b/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb @@ -17,9 +17,13 @@ expect(result).to eq('MASTER, SCHEDD') end it "get scheduler with ganglia right" do - result = scope.function_create_daemon_list([false, true, false, false, true, false]) + result = scope.function_create_daemon_list([false, true, false, false, 'DICE HTCondor', false]) expect(result).to eq('MASTER, SCHEDD, GANGLIAD') end + it "ganglia can be undefined" do + result = scope.function_create_daemon_list([false, true, false, false, nil, false]) + expect(result).to eq('MASTER, SCHEDD') + end it "get manager right" do result = scope.function_create_daemon_list([false, false, true, false, false, false]) expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR') From 9387754f584181903c99855cb1937000600c2155 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 16 Feb 2016 16:39:32 +0000 Subject: [PATCH 35/54] enabling schedulers for ganglia --- manifests/config/scheduler.pp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/manifests/config/scheduler.pp b/manifests/config/scheduler.pp index a13f0ef..d79dfa3 100644 --- a/manifests/config/scheduler.pp +++ b/manifests/config/scheduler.pp @@ -5,6 +5,7 @@ # general - manifest or 1 or more configs $condor_user = $htcondor::condor_user $condor_group = $htcondor::condor_group + $ganglia_cluster_name = $htcondor::ganglia_cluster_name # 12_resourcelimits.config $max_walltime = $htcondor::max_walltime $max_cputime = $htcondor::max_cputime @@ -16,6 +17,7 @@ # /etc/condor/config.d/21_schedd.config $daemon_list = $htcondor::config::daemon_list # template files + $template_ganglia = $htcondor::template_ganglia $template_queues = $htcondor::template_queues $template_resourcelimits = $htcondor::template_resourcelimits $template_schedd = $htcondor::template_schedd @@ -48,4 +50,15 @@ mode => '0644', notify => Exec['/usr/sbin/condor_reconfig'], } + + if $ganglia_cluster_name { + file { '/etc/condor/config.d/23_ganglia.config': + content => template($template_ganglia), + require => Package['condor'], + owner => $condor_user, + group => $condor_group, + mode => '0644', + notify => Exec['/usr/sbin/condor_reconfig'], + } + } } From 3bd85e3aba930a0308c07bcee541f0b895615ac8 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 17 Feb 2016 10:55:39 +0000 Subject: [PATCH 36/54] simplified daemon list creation and fixed incorrect "or/and" usage in ruby --- .../parser/functions/create_daemon_list.rb | 12 +++++------ manifests/config.pp | 21 ++++++++++++++++--- .../functions/create_daemon_list_spec.rb | 12 +++++++++-- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/lib/puppet/parser/functions/create_daemon_list.rb b/lib/puppet/parser/functions/create_daemon_list.rb index 0da08b6..732d708 100644 --- a/lib/puppet/parser/functions/create_daemon_list.rb +++ b/lib/puppet/parser/functions/create_daemon_list.rb @@ -5,9 +5,9 @@ module Puppet::Parser::Functions is_scheduler = args[1] is_manager = args[2] - defrag = args[3] - ganglia = args[4] - high_availability= args[5] + enable_multicore = args[3] + run_ganglia = args[4] + more_than_two_managers = args[5] daemon_list = Array.new # all nodes have master @@ -23,13 +23,13 @@ module Puppet::Parser::Functions daemon_list.push 'COLLECTOR' daemon_list.push 'NEGOTIATOR' end - if defrag == true and is_manager == true + if enable_multicore == true && is_manager == true daemon_list.push 'DEFRAG' end - if ganglia and (is_manager == true or is_scheduler == true) + if run_ganglia == true && (is_manager == true || is_scheduler == true) daemon_list.push 'GANGLIAD' end - if high_availability == true and is_manager == true + if more_than_two_managers == true && is_manager == true daemon_list.push 'HAD' daemon_list.push 'REPLICATION' end diff --git a/manifests/config.pp b/manifests/config.pp index eda1e51..5f88a1e 100644 --- a/manifests/config.pp +++ b/manifests/config.pp @@ -18,9 +18,24 @@ $common_class = 'htcondor::config::common' class { $common_class: } - - $daemon_list = create_daemon_list($is_worker, $is_scheduler, $is_manager, - $enable_multicore, $ganglia_cluster_name, size($managers) > 1) + $more_than_two_managers = size($managers) > 1 + $run_ganglia = $ganglia_cluster_name != undef + + $daemon_list = create_daemon_list($is_worker, $is_scheduler, + $is_manager, $enable_multicore, $run_ganglia, $more_than_two_managers) + + $debug_msg = "constructing daemon list from \n \ + -is_worker: ${is_worker}\n \ + -is_scheduler: ${is_scheduler}\n \ + -is_manager: ${is_manager}\n \ + -enable_multicore: ${enable_multicore}\n \ + -run_ganglia: ${run_ganglia} \n \ + -more_than_two_managers: ${more_than_two_managers} \n\ + resulting in ${daemon_list}" + notify { 'checking daemon list:': + withpath => true, + name => $debug_msg, + } if $is_scheduler { class { 'htcondor::config::scheduler': require => Class[$common_class], } diff --git a/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb b/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb index 22fd5ee..7b485b3 100644 --- a/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb +++ b/spec/unit/puppet/parser/functions/create_daemon_list_spec.rb @@ -17,17 +17,21 @@ expect(result).to eq('MASTER, SCHEDD') end it "get scheduler with ganglia right" do - result = scope.function_create_daemon_list([false, true, false, false, 'DICE HTCondor', false]) + result = scope.function_create_daemon_list([false, true, false, false, true, false]) expect(result).to eq('MASTER, SCHEDD, GANGLIAD') end it "ganglia can be undefined" do - result = scope.function_create_daemon_list([false, true, false, false, nil, false]) + result = scope.function_create_daemon_list([false, true, false, false, false, false]) expect(result).to eq('MASTER, SCHEDD') end it "get manager right" do result = scope.function_create_daemon_list([false, false, true, false, false, false]) expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR') end + it "get manager with ganglia right" do + result = scope.function_create_daemon_list([false, false, true, false, true, false]) + expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR, GANGLIAD') + end it "get multicore manager right" do result = scope.function_create_daemon_list([false, false, true, true, false, false]) expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR, DEFRAG') @@ -36,6 +40,10 @@ result = scope.function_create_daemon_list([false, false, true, false, false, true]) expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR, HAD, REPLICATION') end + it "multiple managers and ganglia" do + result = scope.function_create_daemon_list([false, false, true, false, true, true]) + expect(result).to eq('MASTER, COLLECTOR, NEGOTIATOR, GANGLIAD, HAD, REPLICATION') + end it "all the daemons" do result = scope.function_create_daemon_list([true, true, true, true, true, true]) expect(result).to eq('MASTER, STARTD, SCHEDD, COLLECTOR, NEGOTIATOR, DEFRAG, GANGLIAD, HAD, REPLICATION') From c8285f2673225ceb342d1222e706a2824215df19 Mon Sep 17 00:00:00 2001 From: kreczko Date: Wed, 17 Feb 2016 15:55:45 +0000 Subject: [PATCH 37/54] replacing machine list creation with stdlib functions and splitting 1 item per line in config --- .../parser/functions/join_machine_list.rb | 15 -------- manifests/config/security.pp | 37 +++++++------------ .../functions/join_machine_list_spec.rb | 27 -------------- templates/10_security.config.erb | 15 ++++++-- 4 files changed, 26 insertions(+), 68 deletions(-) delete mode 100644 lib/puppet/parser/functions/join_machine_list.rb delete mode 100644 spec/unit/puppet/parser/functions/join_machine_list_spec.rb diff --git a/lib/puppet/parser/functions/join_machine_list.rb b/lib/puppet/parser/functions/join_machine_list.rb deleted file mode 100644 index dd245b0..0000000 --- a/lib/puppet/parser/functions/join_machine_list.rb +++ /dev/null @@ -1,15 +0,0 @@ -module Puppet::Parser::Functions - newfunction(:join_machine_list, :type => :rvalue) do |args| - raise(Puppet::ParseError, "join_machine_list() wrong number of arguments. Given: #{args.size} for 2)") if args.size !=2 - prefix = args[0] - machine_list = args[1] - new_machine_list = Array.new - - machine_list.each do |item| - machine = prefix + item - new_machine_list.push machine - end - - return new_machine_list.join(", ") - end -end diff --git a/manifests/config/security.pp b/manifests/config/security.pp index 6068e90..ae437b5 100644 --- a/manifests/config/security.pp +++ b/manifests/config/security.pp @@ -26,7 +26,7 @@ $default_domain_name = $htcondor::default_domain_name $filesystem_domain = $htcondor::filesystem_domain $is_worker = $htcondor::is_worker - $machine_list_prefix = $htcondor::machine_list_prefix + $machine_prefix_remote = $htcondor::machine_list_prefix $uid_domain = $htcondor::uid_domain # for private networks @@ -44,28 +44,19 @@ # both users in case a machine has more than one role (i.e. manager + CE) $machine_prefix_local = "${condor_user}@$(UID_DOMAIN)/" - $manager_string_remote = join_machine_list($machine_list_prefix, - $managers) - $manager_string_local = join_machine_list($machine_prefix_local, - $managers) - $manager_string = join([ - $manager_string_remote, - $manager_string_local], ', ') - - $sched_string_remote = join_machine_list($machine_list_prefix, - $schedulers) - $sched_string_local = join_machine_list($machine_prefix_local, - $schedulers) - $sched_string = join([ - $sched_string_remote, - $sched_string_local], ', ') - - $wn_string_remote = join_machine_list($machine_list_prefix, - $workers) - $wn_string_local = join_machine_list($machine_prefix_local, - $workers) - $wn_string = join([$wn_string_remote, $wn_string_local], ', ' - ) + $manager_list_local = prefix($managers, $machine_prefix_local) + $manager_list_remote = prefix($managers, $machine_prefix_remote) + $manager_list = union($manager_list_local, + $manager_list_remote) + + $scheduler_list_local = prefix($schedulers, $machine_prefix_local) + $scheduler_list_remote = prefix($schedulers, $machine_prefix_remote) + $scheduler_list = union($scheduler_list_local, + $scheduler_list_remote) + + $worker_list_local = prefix($workers, $machine_prefix_local) + $worker_list_remote = prefix($workers, $machine_prefix_remote) + $worker_list = union($worker_list_local, $worker_list_remote) file { '/etc/condor/config.d/10_security.config': content => template($template_security), diff --git a/spec/unit/puppet/parser/functions/join_machine_list_spec.rb b/spec/unit/puppet/parser/functions/join_machine_list_spec.rb deleted file mode 100644 index 046a33e..0000000 --- a/spec/unit/puppet/parser/functions/join_machine_list_spec.rb +++ /dev/null @@ -1,27 +0,0 @@ -require 'spec_helper' -require 'puppetlabs_spec_helper/puppetlabs_spec/puppet_internals' - -describe "join_machine_list function" do - let(:scope) { PuppetlabsSpec::PuppetInternals.scope } - it "should exist" do - expect(Puppet::Parser::Functions.function("join_machine_list")).to eq("function_join_machine_list") - end - machine_prefix = 'condor_pool@$(UID_DOMAIN)/' - - context 'join_machine_list tests' do - it "single machine" do - result = scope.function_join_machine_list([machine_prefix,['test1.example.com']]) - expect(result).to eq(machine_prefix + 'test1.example.com') - end - it "single machine different prefix" do - prefix = 'root@$(UID_DOMAIN)/' - result = scope.function_join_machine_list([prefix, ['test1.example.com']]) - expect(result).to eq(prefix + 'test1.example.com') - end - it "multiple machines" do - result = scope.function_join_machine_list([machine_prefix,['test1.example.com', 'test2.example.com', 'test3.example.com']]) - expect(result).to eq('condor_pool@$(UID_DOMAIN)/test1.example.com, condor_pool@$(UID_DOMAIN)/test2.example.com, condor_pool@$(UID_DOMAIN)/test3.example.com') - end - end -end - diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 226409e..321e18f 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -20,9 +20,18 @@ TRUST_UID_DOMAIN = True <% end -%> # Machines & users -CMS = <%= @manager_string %> -CES = <%= @sched_string %> -WNS = <%= @wn_string %> +<% if @manager_list then -%> +CMS = \ +<%= @manager_list.sort.map { |k| " #{k}" }.join(", \\\n") -%> +<% end %> +<% if @scheduler_list then -%> +CES = \ +<%= @scheduler_list.sort.map { |k| " #{k}" }.join(", \\\n") -%> +<% end %> +<% if @worker_list then -%> +WNS = \ +<%= @worker_list.sort.map { |k| " #{k}" }.join(", \\\n") -%> +<% end %> USERS = *@$(UID_DOMAIN) From 52b75179435b3dbb56524b0f1479712b448b74bb Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 Feb 2016 10:38:47 +0000 Subject: [PATCH 38/54] added flag to enable [condor reporting](http://research.cs.wisc.edu/htcondor/privacy.html), enabled by default --- manifests/config/common.pp | 1 + manifests/init.pp | 1 + manifests/params.pp | 3 +++ templates/condor_config.local.erb | 6 ++++++ 4 files changed, 11 insertions(+) diff --git a/manifests/config/common.pp b/manifests/config/common.pp index 6e7d0a2..deb7c3c 100644 --- a/manifests/config/common.pp +++ b/manifests/config/common.pp @@ -5,6 +5,7 @@ $condor_group = $htcondor::condor_group # /etc/condor/condor_config.local $admin_email = $htcondor::admin_email + $enable_condor_reporting = $htcondor::enable_condor_reporting $email_domain = $htcondor::email_domain $condor_uid = $htcondor::condor_uid $condor_gid = $htcondor::condor_gid diff --git a/manifests/init.pp b/manifests/init.pp index 742db0a..bc3941b 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -114,6 +114,7 @@ $condor_priority = $htcondor::params::repo_priority, $condor_version = $htcondor::params::condor_version, $custom_attribute = $htcondor::params::custom_attribute, + $enable_condor_reporting = $htcondor::params::enable_condor_reporting, $enable_cgroup = $htcondor::params::enable_cgroup, $enable_multicore = $htcondor::params::enable_multicore, $enable_healthcheck = $htcondor::params::enable_healthcheck, diff --git a/manifests/params.pp b/manifests/params.pp index 7b215e5..42fc874 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -15,6 +15,9 @@ $condor_version = hiera('condor_version', 'present') $custom_attribute = hiera('custom_attribute', 'NORDUGRID_QUEUE') + # this is one of the funding requirements for HTCondor + # for more information see https://research.cs.wisc.edu/htcondor/privacy.html + $enable_condor_reporting = hiera('enable_condor_reporting', true) $enable_cgroup = hiera('enable_cgroup', false) $enable_multicore = hiera('enable_multicore', false) $enable_healthcheck = hiera('enable_healthcheck', false) diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index be3e032..98a0dc7 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -42,3 +42,9 @@ SUBMIT_EXPRS = $(SUBMIT_EXPRS) AcctGroup, AcctSubGroup, AccountingGroup, Concurr <% else -%> <% end %> + +<% unless @enable_condor_reporting -%> +# disable reports to condor-admin@cs.wisc.edu (http://research.cs.wisc.edu/htcondor/privacy.html) +CONDOR_DEVELOPERS = NONE +CONDOR_DEVELOPERS_COLLECTOR = NONE +<% end %> From e8c1faed1ca5980b123146c1c9dc3b929f311e9e Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 Feb 2016 10:39:03 +0000 Subject: [PATCH 39/54] added CHANGELOG --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..fa0048a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,26 @@ +# Version 2.0.0 +Version 2.0.0 brought big changes to the module. The bigest change is a structual one. +`htcondor::params.pp` was added to set defaults for all the parameters. +In addition, parameters are attempted to be read via `hiera` first. Full merge +support for hashes and arrays is provided. +With these changes the `htcondor::config.pp` was split into six pieces: + - the main config setting up the rest + - a common config part + - the security configuration + - separate configs for manager, scheduler & worker +The full detail of these changes can be seen in [PR 53](https://github.com/HEP-Puppet/htcondor/pull/53). + +## New features +- configure connection broker for private workers (i.e. workers that cannot be reached from the manager or scheduler but can reach the manager). +- enabled `ganglia` daemon for schedulers (previously only possible on managers) +- flag to enable [condor reporting](http://research.cs.wisc.edu/htcondor/privacy.html), disabed by default + +## Bug fixes +- daemon list would be incorrect for some versions of Ruby. This was due to the use of `and` and `or` operators which is incorrect for boolean comparisons. +- added missing `cluster_has_multiple_domains` parameter (w.r.t to 2.0.0 beta) +- removed repository dependency if it is disabled + +## Other +- changed config templates to ensure new line at the end of the file and reduced the use of `-%>` +- workers are no longer able to write to schedulers by default +- new formatting for the security config: one line per entry for manager/scheduler/worker \ No newline at end of file From e852adb111526bdf5484c7ff8bf7f54b4aef0bd8 Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 Feb 2016 14:42:13 +0000 Subject: [PATCH 40/54] more information on examples --- README.md | 9 +++++ examples/hiera_example.yaml | 62 -------------------------------- examples/htcondor_common.yaml | 36 +++++++++++++++++++ examples/htcondor_manager.yaml | 38 ++++++++++++++++++++ examples/htcondor_scheduler.yaml | 8 +++++ examples/htcondor_worker.yaml | 5 +++ 6 files changed, 96 insertions(+), 62 deletions(-) delete mode 100644 examples/hiera_example.yaml create mode 100644 examples/htcondor_common.yaml create mode 100644 examples/htcondor_manager.yaml create mode 100644 examples/htcondor_scheduler.yaml create mode 100644 examples/htcondor_worker.yaml diff --git a/README.md b/README.md index 3cbb8a6..a203b10 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,15 @@ yum install -y https://research.cs.wisc.edu/htcondor/yum/repo.d/htcondor-stable- ``` If you wish to use a [pool password for authentication](http://research.cs.wisc.edu/htcondor/manual/latest/3_6Security.html#SECTION00463400000000000000) you will need to create one first: ```condor_store_cred -f /files/pool_password```. +### Examples +`hiera` config examples can be found in the examples folder. They describe a minimal example of + - settings shared across different node types: `htcondor_common.yaml` + - settings for managers (nodes that run collector & negotiator daemons): `htcondor_manager.yaml` + - settings for schedulers: `htcondor_scheduler.yaml` + - settings for worker nodes: `htcondor_common.yaml` +The examples assume class management in hiere by adding `hiera_include('classes')` to the `site.pp`. +Real life examples can be found in https://github.com/uobdic/UKI-SOUTHGRID-BRIS-HEP. + ##Limitations ###General diff --git a/examples/hiera_example.yaml b/examples/hiera_example.yaml deleted file mode 100644 index 0f1ee9b..0000000 --- a/examples/hiera_example.yaml +++ /dev/null @@ -1,62 +0,0 @@ ---- -htcondor::managers: - - collector1.example.org - - collector2.example.org - -htcondor::schedulers: - - sched1.example.org - - sched2.example.org - -htcondor::workers: - - '*.example.org' - -htcondor::uid_domain: 'example.org' -htcondor::collector_name: 'Example-HTCondor-Cluster' -htcondor::cluster_has_multiple_domains: true -htcondor::admin_email: localhost -htcondor::run_as_user: condor -htcondor::run_as_group: condor -htcondor::machine_owner: condor -htcondor::ganglia_cluster_name: 'Example HTCondor' -htcondor::include_username_in_accounting: true - -# max CPU time in seconds - for schedulers only -htcondor::max_cputime: '80 * 60 * 60' -htcondor::max_walltime: '80 * 60 * 60' - -htcondor::per_job_history_dir: '/var/spool/condor/history' -htcondor::pool_home: '/condor' -htcondor::use_pkg_condor_config: true - -htcondor::high_priority_groups: - dteam: -10 - cms.admin: -30 - ops: -20 - ops.admin: -21 - -htcondor::use_accounting_groups: true -htcondor::accounting_groups: - alice: - dynamic_quota: 0.01 - priority_factor: 10000.0 - atlas: - dynamic_quota: 0.01 - priority_factor: 10000.0 - cms: - dynamic_quota: 0.15 - priority_factor: 10000.0 - dteam: - dynamic_quota: 0.01 - priority_factor: 100.0 - ilc: - dynamic_quota: 0.01 - priority_factor: 10000.0 - lhcb: - dynamic_quota: 0.05 - priority_factor: 10000.0 - ops: - dynamic_quota: 0.01 - priority_factor: 1.0 - cms.admin: - dynamic_quota: 0.01 - priority_factor: 1.0 diff --git a/examples/htcondor_common.yaml b/examples/htcondor_common.yaml new file mode 100644 index 0000000..2644fbd --- /dev/null +++ b/examples/htcondor_common.yaml @@ -0,0 +1,36 @@ +--- +message: "This is condor!" +# common htcondor bits +# all install condor +classes: + - htcondor +# common parameters +htcondor::managers: + - collector1.example.org + - collector2.example.org + +htcondor::schedulers: + - sched1.example.org + - sched2.example.org + +htcondor::workers: + - '*.example.org' + +htcondor::uid_domain: 'example.org' +htcondor::default_domain_name: 'example.org' +htcondor::cluster_has_multiple_domains: true +htcondor::collector_name: 'Example-HTCondor-Cluster' +htcondor::cluster_has_multiple_domains: true +htcondor::admin_email: localhost +htcondor::condor_user: condor +htcondor::condor_group: condor +htcondor::machine_owner: condor +htcondor::include_username_in_accounting: true + +htcondor::pool_home: '/condor' +htcondor::use_pkg_condor_config: true +# this refers to a custom puppet mount called 'secrets' +htcondor::pool_password: 'puppet:///secrets/pool_password' + +# should be off for a production system (only true for fresh install or update is needed) +htcondor::install_repositories: false diff --git a/examples/htcondor_manager.yaml b/examples/htcondor_manager.yaml new file mode 100644 index 0000000..289ba89 --- /dev/null +++ b/examples/htcondor_manager.yaml @@ -0,0 +1,38 @@ +--- +classes: + - htcondor + +htcondor::is_manager: true + +htcondor::high_priority_groups: + dteam: -10 + cms.admin: -30 + ops: -20 + ops.admin: -21 +htcondor::use_accounting_groups: true +htcondor::ganglia_cluster_name: 'My HTCondor Cluster' +htcondor::accounting_groups: + alice: + dynamic_quota: 0.01 + priority_factor: 10000.0 + atlas: + dynamic_quota: 0.01 + priority_factor: 10000.0 + cms: + dynamic_quota: 0.15 + priority_factor: 10000.0 + dteam: + dynamic_quota: 0.01 + priority_factor: 100.0 + ilc: + dynamic_quota: 0.01 + priority_factor: 10000.0 + lhcb: + dynamic_quota: 0.05 + priority_factor: 10000.0 + ops: + dynamic_quota: 0.01 + priority_factor: 1.0 + cms.admin: + dynamic_quota: 0.01 + priority_factor: 1.0 diff --git a/examples/htcondor_scheduler.yaml b/examples/htcondor_scheduler.yaml new file mode 100644 index 0000000..6b06736 --- /dev/null +++ b/examples/htcondor_scheduler.yaml @@ -0,0 +1,8 @@ +--- +classes: + - htcondor + +htcondor::is_scheduler: true +# max CPU time in seconds +htcondor::max_cputime: '80 * 60 * 60' +htcondor::max_walltime: '80 * 60 * 60' \ No newline at end of file diff --git a/examples/htcondor_worker.yaml b/examples/htcondor_worker.yaml new file mode 100644 index 0000000..5575545 --- /dev/null +++ b/examples/htcondor_worker.yaml @@ -0,0 +1,5 @@ +--- +classes: + - htcondor + +htcondor::is_worker: true From f5ec02177494442a95dc26911b703ce055785e04 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 8 Mar 2016 17:02:51 +0000 Subject: [PATCH 41/54] replaced custom_attribute with hash of attributes and added custom_job_attributes --- README.md | 19 ++++++++++++++++++ manifests/config/worker.pp | 32 +++++++++++++++--------------- manifests/init.pp | 3 ++- manifests/params.pp | 3 ++- templates/20_workernode.config.erb | 23 +++++++++++++++++---- 5 files changed, 58 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index a203b10..24bc4f3 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,25 @@ If you wish to use a [pool password for authentication](http://research.cs.wisc. The examples assume class management in hiere by adding `hiera_include('classes')` to the `site.pp`. Real life examples can be found in https://github.com/uobdic/UKI-SOUTHGRID-BRIS-HEP. +## Custom machine/job attributes +Sometimes it is necessary to create custom attributes for condor. Machine attributes can be used +in job requirements (e.g. `HasMatLab = True`) and job attributes for job reporting/monitoring (e.g. `HEPSPEC06 = 14.00`). +To specify the attributes in hiera simply add +``` +htcondor::custom_attributes: + - HasMatLab: True + ... +``` +and for job attributes +``` +htcondor::custom_job_attributes: + - HEPSPEC06: 14.00 + - CPUScaling: 1.04 + ... +``` +Although the use is identical, they are put into different places. `custom_attributes` end up added to the `STARTD_ATTRS` +and `custom_job_attributes` are added to `STARTD_JOB_ATTRS`. + ##Limitations ###General diff --git a/manifests/config/worker.pp b/manifests/config/worker.pp index 6d06ffc..6370f35 100644 --- a/manifests/config/worker.pp +++ b/manifests/config/worker.pp @@ -3,24 +3,24 @@ include htcondor::config::security # general - manifest or 1 or more configs - $condor_user = $htcondor::condor_user - $condor_group = $htcondor::condor_group - $health_check_script = $htcondor::health_check_script + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $health_check_script = $htcondor::health_check_script # /etc/condor/config.d/20_workernode.config - $custom_attribute = $htcondor::custom_attribute - $daemon_list = $htcondor::config::daemon_list - $enable_cgroup = $htcondor::enable_cgroup - $enable_healthcheck = $htcondor::enable_healthcheck - $machine_owner = $htcondor::machine_owner - $memory_overcommit = $htcondor::memory_overcommit - $number_of_cpus = $htcondor::number_of_cpus - $partitionable_slots = $htcondor::partitionable_slots - $pool_create = $htcondor::pool_create - $pool_home = $htcondor::pool_home - $use_pid_namespaces = $htcondor::use_pid_namespaces + $custom_attributes = $htcondor::custom_attributes + $custom_job_attributes = $htcondor::custom_job_attributes + $daemon_list = $htcondor::config::daemon_list + $enable_cgroup = $htcondor::enable_cgroup + $enable_healthcheck = $htcondor::enable_healthcheck + $machine_owner = $htcondor::machine_owner + $memory_overcommit = $htcondor::memory_overcommit + $number_of_cpus = $htcondor::number_of_cpus + $partitionable_slots = $htcondor::partitionable_slots + $pool_create = $htcondor::pool_create + $pool_home = $htcondor::pool_home + $use_pid_namespaces = $htcondor::use_pid_namespaces # template files - $template_workernode = $htcondor::template_workernode - + $template_workernode = $htcondor::template_workernode file { '/etc/condor/config.d/20_workernode.config': content => template($template_workernode), diff --git a/manifests/init.pp b/manifests/init.pp index bc3941b..496d653 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -113,7 +113,8 @@ $admin_email = $htcondor::params::admin_email, $condor_priority = $htcondor::params::repo_priority, $condor_version = $htcondor::params::condor_version, - $custom_attribute = $htcondor::params::custom_attribute, + $custom_attributes = $htcondor::params::custom_attributes, + $custom_job_attributes = $htcondor::params::custom_job_attributes, $enable_condor_reporting = $htcondor::params::enable_condor_reporting, $enable_cgroup = $htcondor::params::enable_cgroup, $enable_multicore = $htcondor::params::enable_multicore, diff --git a/manifests/params.pp b/manifests/params.pp index 42fc874..19ef2b5 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -13,7 +13,8 @@ ) $repo_priority = hiera('repo_priority', '99') $condor_version = hiera('condor_version', 'present') - $custom_attribute = hiera('custom_attribute', 'NORDUGRID_QUEUE') + $custom_attributes = hiera_hash('custom_attribute', {}) + $custom_job_attributes = hiera_hash('custom_job_attributes', {}) # this is one of the funding requirements for HTCondor # for more information see https://research.cs.wisc.edu/htcondor/privacy.html diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index 10187fd..2320011 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -5,11 +5,26 @@ NUM_CPUS = <%= @number_of_cpus %> DETECTED_CPUS = $NUM_CPUS -# custom attribute for easier splitting into queues -# with ARC CE (using condor_requirements=" && custom_attribute" -<%= @custom_attribute %> = True +# custom machine attributes for job matching +# e.g. queues with ARC CE (using condor_requirements=" && custom_attribute)" +<% if @custom_attributes.any? -%> +<% @custom_attributes.each do |k, v| -%> +<%=k -%> = <%=v %> +<% end -%> +STARTD_ATTRS = <%= @custom_attributes.keys.join(', ') -%>, MachineOwner, StartJobs +<% else -%> +STARTD_ATTRS = MachineOwner, StartJobs +<% end -%> -STARTD_ATTRS = <%= @custom_attribute %>, $(STARTD_ATTRS) MachineOwner, StartJobs +# custom job attributes for job reporting/monitoring +<% if @custom_job_attributes.any? -%> +<% @custom_job_attributes.each do |k, v| -%> +<%=k -%> = <%=v %> +<% end -%> +STARTD_JOB_ATTRS = <%= @custom_job_attributes.keys.join(', ') -%>, MemoryUsage +<% else -%> +STARTD_JOB_ATTRS = MemoryUsage +<% end -%> ## Permanent way of stopping jobs from starting STARTD.SETTABLE_ATTRS_ADMINISTRATOR = StartJobs From 2713641462c9e1a1ee71ac51933a8548a17a4a0b Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 6 Jun 2016 11:27:21 +0100 Subject: [PATCH 42/54] fixed security for ANONYMOUS --- templates/10_security.config.erb | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 321e18f..3bb4605 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -76,7 +76,11 @@ SCHEDD.DENY_WRITE = nobody@$(UID_DOMAIN) # Authentication SEC_DEFAULT_AUTHENTICATION = REQUIRED SEC_READ_AUTHENTICATION = OPTIONAL +<% if @auth_string.include? "ANONYMOUS" -%> +SEC_CLIENT_AUTHENTICATION = OPTIONAL +<% else -%> SEC_CLIENT_AUTHENTICATION = REQUIRED +<% end -%> SEC_DEFAULT_AUTHENTICATION_METHODS = <%= @auth_string %> SEC_CLIENT_AUTHENTICATION_METHODS = <%= @auth_string %> SEC_READ_AUTHENTICATION_METHODS = <%= @auth_string %> @@ -96,13 +100,20 @@ KERBEROS_MAP_FILE = <%= @krb_map_file %> SEC_PASSWORD_FILE = /etc/condor/pool_password <% end -%> -# Integrity -SEC_DEFAULT_INTEGRITY = REQUIRED # Encryption SEC_DEFAULT_ENCRYPTION = REQUIRED SEC_DEFAULT_CRYPTO_METHODS = BLOWFISH +<% if @auth_string.include? "ANONYMOUS" -%> +SEC_READ_ENCRYPTION = OPTIONAL +SEC_CLIENT_ENCRYPTION = OPTIONAL +<% end -%> # Integrity +<% if @auth_string.include? "ANONYMOUS" -%> +SEC_READ_INTEGRITY = OPTIONAL +SEC_CLIENT_INTEGRITY = OPTIONAL +<% else -%> SEC_DEFAULT_INTEGRITY = REQUIRED +<% end -%> SEC_DAEMON_INTEGRITY = REQUIRED SEC_NEGOTIATOR_INTEGRITY = REQUIRED From 5a007b0fddc54bc64b394757be4de3cafb5c394e Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 6 Jun 2016 11:34:01 +0100 Subject: [PATCH 43/54] should not touch condor_config or condor_config.local --- manifests/config/common.pp | 15 +-------------- manifests/init.pp | 1 - manifests/params.pp | 1 - 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/manifests/config/common.pp b/manifests/config/common.pp index deb7c3c..f7574d2 100644 --- a/manifests/config/common.pp +++ b/manifests/config/common.pp @@ -16,26 +16,13 @@ $leave_job_in_queue = $htcondor::leave_job_in_queue $request_memory = $htcondor::request_memory - $use_pkg_condor_config = $htcondor::use_pkg_condor_config $template_config_local = $htcondor::template_config_local $now = strftime('%d.%m.%Y_%H.%M') # files common between machines - unless $use_pkg_condor_config { - file { '/etc/condor/condor_config': - backup => ".bak.${now}", - source => "puppet:///modules/${module_name}/condor_config", - require => Package['condor'], - owner => $condor_user, - group => $condor_group, - mode => '0644', - notify => Exec['/usr/sbin/condor_reconfig'], - } - } - - file { '/etc/condor/condor_config.local': + file { '/etc/condor/config.d/00_config_local.config': backup => ".bak.${now}", content => template($template_config_local), require => Package['condor'], diff --git a/manifests/init.pp b/manifests/init.pp index 496d653..98f4bb8 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -127,7 +127,6 @@ $health_check_script = $htcondor::params::health_check_script, $include_username_in_accounting = $htcondor::params::include_username_in_accounting, - $use_pkg_condor_config = $htcondor::params::use_pkg_condor_config, $install_repositories = $htcondor::params::install_repositories, $dev_repositories = $htcondor::params::dev_repositories, $is_scheduler = $htcondor::params::is_scheduler, diff --git a/manifests/params.pp b/manifests/params.pp index 19ef2b5..2617c0d 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -48,7 +48,6 @@ ) $include_username_in_accounting = hiera('include_username_in_accounting', false) - $use_pkg_condor_config = hiera('use_pkg_condor_config', false) $install_repositories = hiera('install_repositories', true) $dev_repositories = hiera('dev_repositories', false) From 464fd7116ad20a834a98ed68ff5836ea19201554 Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 6 Jun 2016 11:40:54 +0100 Subject: [PATCH 44/54] added anonymous auth --- lib/puppet/parser/functions/construct_auth_string.rb | 7 ++++++- manifests/config/security.pp | 4 +++- manifests/init.pp | 1 + manifests/params.pp | 1 + 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/puppet/parser/functions/construct_auth_string.rb b/lib/puppet/parser/functions/construct_auth_string.rb index 823c15e..dc9ca1a 100644 --- a/lib/puppet/parser/functions/construct_auth_string.rb +++ b/lib/puppet/parser/functions/construct_auth_string.rb @@ -1,15 +1,17 @@ module Puppet::Parser::Functions newfunction(:construct_auth_string, :type => :rvalue) do |args| - raise(Puppet::ParseError, "construct_auth_string() wrong number of arguments. Given: #{args.size} for 4)") if args.size != 4 + raise(Puppet::ParseError, "construct_auth_string() wrong number of arguments. Given: #{args.size} for 5)") if args.size != 5 use_fs_auth = args[0] use_password_auth = args[1] use_kerberos_auth = args[2] use_claim_to_be_auth = args[3] + use_anonymous_auth = args[4] fs_string = 'FS' pw_string = 'PASSWORD' krb_string = 'KERBEROS' ctb_string = 'CLAIMTOBE' + anon_string = 'ANONYMOUS' auth_methods = Array.new if use_fs_auth == true @@ -24,6 +26,9 @@ module Puppet::Parser::Functions if use_claim_to_be_auth == true auth_methods.push ctb_string end + if use_anonymous_auth == true + auth_methods.push anon_string + end return auth_methods.join(",") end diff --git a/manifests/config/security.pp b/manifests/config/security.pp index ae437b5..c704627 100644 --- a/manifests/config/security.pp +++ b/manifests/config/security.pp @@ -8,6 +8,7 @@ $managers = $htcondor::managers $workers = $htcondor::workers + $use_anonymous_auth = $htcondor::use_anonymous_auth $use_fs_auth = $htcondor::use_fs_auth $use_password_auth = $htcondor::use_password_auth $use_kerberos_auth = $htcondor::use_kerberos_auth @@ -37,7 +38,8 @@ $template_security = $htcondor::template_security $auth_string = construct_auth_string($use_fs_auth, - $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth) + $use_password_auth, $use_kerberos_auth, $use_claim_to_be_auth, + $use_anonymous_auth) # because HTCondor uses user 'condor_pool' for remote access # and user 'condor' for local the variables below need to include diff --git a/manifests/init.pp b/manifests/init.pp index 98f4bb8..afbfb90 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -175,6 +175,7 @@ $htcondor::params::template_highavailability, $use_htcondor_account_mapping = $htcondor::params::use_htcondor_account_mapping, + $use_anonymous_auth = $htcondor::params::use_anonymous_auth, $use_fs_auth = $htcondor::params::use_fs_auth, $use_password_auth = $htcondor::params::use_password_auth, $use_kerberos_auth = $htcondor::params::use_kerberos_auth, diff --git a/manifests/params.pp b/manifests/params.pp index 2617c0d..3392253 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -86,6 +86,7 @@ $condor_gid = hiera('condor_gid', 0) # authentication + $use_anonymous_auth = hiera('use_anonymous_auth', false) $use_fs_auth = hiera('use_fs_auth', true) $use_password_auth = hiera('use_password_auth', true) $use_kerberos_auth = hiera('use_kerberos_auth', false) From 25a79a902298e5ee0ea5007bd11722593b67f813 Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 6 Jun 2016 11:56:14 +0100 Subject: [PATCH 45/54] setting SEC_DEFAULT_INTEGRITY = REQUIRED --- templates/10_security.config.erb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/templates/10_security.config.erb b/templates/10_security.config.erb index 3bb4605..cc857a9 100644 --- a/templates/10_security.config.erb +++ b/templates/10_security.config.erb @@ -109,11 +109,10 @@ SEC_CLIENT_ENCRYPTION = OPTIONAL <% end -%> # Integrity +SEC_DEFAULT_INTEGRITY = REQUIRED <% if @auth_string.include? "ANONYMOUS" -%> SEC_READ_INTEGRITY = OPTIONAL SEC_CLIENT_INTEGRITY = OPTIONAL -<% else -%> -SEC_DEFAULT_INTEGRITY = REQUIRED <% end -%> SEC_DAEMON_INTEGRITY = REQUIRED SEC_NEGOTIATOR_INTEGRITY = REQUIRED From e279a5e3e500f646d9311e4b904e07f0656d7755 Mon Sep 17 00:00:00 2001 From: kreczko Date: Mon, 6 Jun 2016 12:13:06 +0100 Subject: [PATCH 46/54] renamed custom_attributes to custom_machine_attributes --- CHANGELOG.md | 7 ++++++- manifests/config/worker.pp | 32 +++++++++++++++--------------- manifests/init.pp | 7 +------ manifests/params.pp | 2 +- templates/20_workernode.config.erb | 8 ++++---- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa0048a..4d1d20f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ The full detail of these changes can be seen in [PR 53](https://github.com/HEP-P - configure connection broker for private workers (i.e. workers that cannot be reached from the manager or scheduler but can reach the manager). - enabled `ganglia` daemon for schedulers (previously only possible on managers) - flag to enable [condor reporting](http://research.cs.wisc.edu/htcondor/privacy.html), disabed by default +- added `use_anonymous_auth` +- added `custom_machine_attributes` and `custom_machine_attributes` which can be used to add classads for `STARTD_ATTRS` and `STARTD_JOB_ATTRS` ## Bug fixes - daemon list would be incorrect for some versions of Ruby. This was due to the use of `and` and `or` operators which is incorrect for boolean comparisons. @@ -23,4 +25,7 @@ The full detail of these changes can be seen in [PR 53](https://github.com/HEP-P ## Other - changed config templates to ensure new line at the end of the file and reduced the use of `-%>` - workers are no longer able to write to schedulers by default -- new formatting for the security config: one line per entry for manager/scheduler/worker \ No newline at end of file +- new formatting for the security config: one line per entry for manager/scheduler/worker +- removed `use_pkg_config` parameter. +- no longer changing `/etc/condor/condor_config` nor `/etc/condor/condor_config.local` as recommended by the HTCondor team +- content previously in `/etc/condor/condor_config.local` now in `/etc/condor/config.d/00_config_local.config` \ No newline at end of file diff --git a/manifests/config/worker.pp b/manifests/config/worker.pp index 6370f35..152c6cf 100644 --- a/manifests/config/worker.pp +++ b/manifests/config/worker.pp @@ -3,24 +3,24 @@ include htcondor::config::security # general - manifest or 1 or more configs - $condor_user = $htcondor::condor_user - $condor_group = $htcondor::condor_group - $health_check_script = $htcondor::health_check_script + $condor_user = $htcondor::condor_user + $condor_group = $htcondor::condor_group + $health_check_script = $htcondor::health_check_script # /etc/condor/config.d/20_workernode.config - $custom_attributes = $htcondor::custom_attributes - $custom_job_attributes = $htcondor::custom_job_attributes - $daemon_list = $htcondor::config::daemon_list - $enable_cgroup = $htcondor::enable_cgroup - $enable_healthcheck = $htcondor::enable_healthcheck - $machine_owner = $htcondor::machine_owner - $memory_overcommit = $htcondor::memory_overcommit - $number_of_cpus = $htcondor::number_of_cpus - $partitionable_slots = $htcondor::partitionable_slots - $pool_create = $htcondor::pool_create - $pool_home = $htcondor::pool_home - $use_pid_namespaces = $htcondor::use_pid_namespaces + $custom_machine_attributes = $htcondor::custom_machine_attributes + $custom_job_attributes = $htcondor::custom_job_attributes + $daemon_list = $htcondor::config::daemon_list + $enable_cgroup = $htcondor::enable_cgroup + $enable_healthcheck = $htcondor::enable_healthcheck + $machine_owner = $htcondor::machine_owner + $memory_overcommit = $htcondor::memory_overcommit + $number_of_cpus = $htcondor::number_of_cpus + $partitionable_slots = $htcondor::partitionable_slots + $pool_create = $htcondor::pool_create + $pool_home = $htcondor::pool_home + $use_pid_namespaces = $htcondor::use_pid_namespaces # template files - $template_workernode = $htcondor::template_workernode + $template_workernode = $htcondor::template_workernode file { '/etc/condor/config.d/20_workernode.config': content => template($template_workernode), diff --git a/manifests/init.pp b/manifests/init.pp index afbfb90..4f4c33c 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -29,11 +29,6 @@ # Sets CONDOR_ADMIN # (http://research.cs.wisc.edu/htcondor/manual/latest/3_3Configuration.html). # -# [*custom_attribute*] -# Can be used to specify a ClassAd via custom_attribute = True. This is useful -# when creating queues with ARC CEs -# Default: NORDUGRID_QUEUE -# # [*high_priority_groups*] # A hash of groups with high priority. It is used for the group sorting # expression for condor. Groups with lower value are considered first. @@ -113,7 +108,7 @@ $admin_email = $htcondor::params::admin_email, $condor_priority = $htcondor::params::repo_priority, $condor_version = $htcondor::params::condor_version, - $custom_attributes = $htcondor::params::custom_attributes, + $custom_machine_attributes = $htcondor::params::custom_machine_attributes, $custom_job_attributes = $htcondor::params::custom_job_attributes, $enable_condor_reporting = $htcondor::params::enable_condor_reporting, $enable_cgroup = $htcondor::params::enable_cgroup, diff --git a/manifests/params.pp b/manifests/params.pp index 3392253..d21874d 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -13,7 +13,7 @@ ) $repo_priority = hiera('repo_priority', '99') $condor_version = hiera('condor_version', 'present') - $custom_attributes = hiera_hash('custom_attribute', {}) + $custom_machine_attributes = hiera_hash('custom_machine_attribute', {}) $custom_job_attributes = hiera_hash('custom_job_attributes', {}) # this is one of the funding requirements for HTCondor diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index 2320011..ea61332 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -6,12 +6,12 @@ NUM_CPUS = <%= @number_of_cpus %> DETECTED_CPUS = $NUM_CPUS # custom machine attributes for job matching -# e.g. queues with ARC CE (using condor_requirements=" && custom_attribute)" -<% if @custom_attributes.any? -%> -<% @custom_attributes.each do |k, v| -%> +# e.g. queues with ARC CE (using condor_requirements=" && NORDUGRID_QUEUE)" +<% if @custom_machine_attributes.any? -%> +<% @custom_machine_attributes.each do |k, v| -%> <%=k -%> = <%=v %> <% end -%> -STARTD_ATTRS = <%= @custom_attributes.keys.join(', ') -%>, MachineOwner, StartJobs +STARTD_ATTRS = <%= @custom_machine_attributes.keys.join(', ') -%>, MachineOwner, StartJobs <% else -%> STARTD_ATTRS = MachineOwner, StartJobs <% end -%> From 19d9ade53e9b836f179a4f00c49f0c9415da8019 Mon Sep 17 00:00:00 2001 From: kreczko Date: Tue, 25 Apr 2017 21:00:59 +0100 Subject: [PATCH 47/54] changed pool_password to 0600 to work with HTCondor 8.6 --- manifests/config/security.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifests/config/security.pp b/manifests/config/security.pp index c704627..e0a35fd 100644 --- a/manifests/config/security.pp +++ b/manifests/config/security.pp @@ -82,7 +82,7 @@ source => $pool_password_file, owner => root, group => root, - mode => '0640', + mode => '0600', } } From 8d107c238f54cbdf6b5447d210c7a0bb471be5ab Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 May 2017 12:08:22 +0100 Subject: [PATCH 48/54] Added Alessandra's cgroup changes (issue #59) --- manifests/config/worker.pp | 1 + manifests/init.pp | 1 + manifests/params.pp | 10 ++++++++++ templates/20_workernode.config.erb | 4 ++-- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/manifests/config/worker.pp b/manifests/config/worker.pp index 152c6cf..32ba25c 100644 --- a/manifests/config/worker.pp +++ b/manifests/config/worker.pp @@ -11,6 +11,7 @@ $custom_job_attributes = $htcondor::custom_job_attributes $daemon_list = $htcondor::config::daemon_list $enable_cgroup = $htcondor::enable_cgroup + $htcondor_cgroup = $htcondor::htcondor_cgroup $enable_healthcheck = $htcondor::enable_healthcheck $machine_owner = $htcondor::machine_owner $memory_overcommit = $htcondor::memory_overcommit diff --git a/manifests/init.pp b/manifests/init.pp index 4f4c33c..2344006 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -114,6 +114,7 @@ $enable_cgroup = $htcondor::params::enable_cgroup, $enable_multicore = $htcondor::params::enable_multicore, $enable_healthcheck = $htcondor::params::enable_healthcheck, + $htcondor_cgroup = $htcondor::params::htcondor_cgroup, $high_priority_groups = $htcondor::params::high_priority_groups, $priority_halflife = $htcondor::params::priority_halflife, $default_prio_factor = $htcondor::params::default_prio_factor, diff --git a/manifests/params.pp b/manifests/params.pp index d21874d..211d729 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -23,6 +23,16 @@ $enable_multicore = hiera('enable_multicore', false) $enable_healthcheck = hiera('enable_healthcheck', false) + + if $facts['os']['family'] == 'RedHat' and $facts['os']['release']['major'] == '7' { + $htcondor_cgroup_default = '/system.slice/condor.service' + } + else{ + $htcondor_cgroup_default = 'htcondor' + } + $htcondor_cgroup = hiera('htcondor_cgroup', $htcondor_cgroup_default) + + $high_priority_groups = hiera_hash('high_priority_groups', undef) $default_accounting_groups = { diff --git a/templates/20_workernode.config.erb b/templates/20_workernode.config.erb index ea61332..f16862d 100644 --- a/templates/20_workernode.config.erb +++ b/templates/20_workernode.config.erb @@ -31,7 +31,7 @@ STARTD.SETTABLE_ATTRS_ADMINISTRATOR = StartJobs ENABLE_PERSISTENT_CONFIG = TRUE PERSISTENT_CONFIG_DIR = /etc/condor/persistent -## Healthcheck +## Healthcheck <% if @enable_healthcheck == true -%> STARTD_CRON_JOBLIST = $(STARTD_CRON_JOBLIST) WN_HEALTHCHECK STARTD_CRON_WN_HEALTHCHECK_EXECUTABLE = /usr/local/bin/healhcheck_wn_condor @@ -104,7 +104,7 @@ MAX_NUM_STARTD_LOG = 10 <%- if @enable_cgroup -%> # Enable CGROUP -BASE_CGROUP = htcondor +BASE_CGROUP = <%= @htcondor_cgroup %> CGROUP_MEMORY_LIMIT = soft <%- end -%> From 7c40e56f02e43f3b61ea2ca0ab012ae11636f8ac Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 May 2017 12:19:46 +0100 Subject: [PATCH 49/54] Added Alessandra's memory_factor (issue #59) --- manifests/config/scheduler.pp | 1 + manifests/init.pp | 3 ++- manifests/params.pp | 1 + templates/12_resourcelimits.config.erb | 3 +-- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/manifests/config/scheduler.pp b/manifests/config/scheduler.pp index d79dfa3..fe636ef 100644 --- a/manifests/config/scheduler.pp +++ b/manifests/config/scheduler.pp @@ -9,6 +9,7 @@ # 12_resourcelimits.config $max_walltime = $htcondor::max_walltime $max_cputime = $htcondor::max_cputime + $memory_factor = $htcondor::memory_factor # /etc/condor/config.d/13_queues.config $queues = $htcondor::queues $periodic_expr_interval = $htcondor::periodic_expr_interval diff --git a/manifests/init.pp b/manifests/init.pp index 2344006..18cef62 100644 --- a/manifests/init.pp +++ b/manifests/init.pp @@ -185,7 +185,8 @@ $krb_map_file = $htcondor::params::krb_map_file, $machine_list_prefix = $htcondor::params::machine_list_prefix, $max_walltime = $htcondor::params::max_walltime, - $max_cputime = $htcondor::params::max_cputime,) inherits + $max_cputime = $htcondor::params::max_cputime, + $memory_factor = $htcondor::paramse::memory_factor,) inherits ::htcondor::params { if $install_repositories { class { 'htcondor::repositories': } diff --git a/manifests/params.pp b/manifests/params.pp index 211d729..5a4ebc4 100644 --- a/manifests/params.pp +++ b/manifests/params.pp @@ -79,6 +79,7 @@ $leave_job_in_queue = hiera('leave_job_in_queue', undef) $max_walltime = hiera('max_walltime', '80 * 60 * 60') $max_cputime = hiera('max_cputime', '80 * 60 * 60') + $memory_factor = hiera('memory_factor', '1000') $ganglia_cluster_name = hiera('ganglia_cluster_name', undef) diff --git a/templates/12_resourcelimits.config.erb b/templates/12_resourcelimits.config.erb index d0757aa..c069bc8 100644 --- a/templates/12_resourcelimits.config.erb +++ b/templates/12_resourcelimits.config.erb @@ -6,7 +6,7 @@ RemoveDefaultJobWallTime = ( RemoteWallClockTime > <%= @max_walltime %> ) RemoveDefaultJobCpuTime = ( RemoteSysCpu + RemoteUserCpu > <%= @max_cputime %> ) ## Memory usage limit -RemoveMemoryUsage = ( ResidentSetSize_RAW > 1000*RequestMemory ) +RemoveMemoryUsage = ( ResidentSetSize_RAW > <%= @memory_factor %>*RequestMemory ) ## Held jobs - don't want them to stay in the system forever RemoveHeldJobs = ( (JobStatus==5 && (CurrentTime - EnteredCurrentStatus) > 30 * 60) ) @@ -20,4 +20,3 @@ SYSTEM_PERIODIC_REMOVE = $(RemoveDefaultJobWallTime) || \ $(RemoveHeldJobs) || \ $(RemoveMemoryUsage) || \ $(RemoveMultipleRunJobs) - From ffa66ff9538116bb12de01d2f125bbd59e869dd8 Mon Sep 17 00:00:00 2001 From: Alessandra Forti Date: Sun, 19 Mar 2017 10:39:58 +0000 Subject: [PATCH 50/54] Removed CONDOR_IDS settings from condor_config.local.erb template when owner is ROOT --- templates/condor_config.local.erb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/templates/condor_config.local.erb b/templates/condor_config.local.erb index 98a0dc7..6cba44c 100644 --- a/templates/condor_config.local.erb +++ b/templates/condor_config.local.erb @@ -1,6 +1,4 @@ -<% if @condor_user == 'root' and @condor_group == 'root' -%> -CONDOR_IDS = 0.0 -<% elsif @condor_uid.to_i > 0 and @condor_gid.to_i > 0 -%> +<% if @condor_uid.to_i > 0 and @condor_gid.to_i > 0 -%> CONDOR_IDS = <%= @condor_uid %>.<%= @condor_gid %> <% end -%> CONDOR_ADMIN = <%= @admin_email %> From 3895150dc3a16054e43a74f6c26d59046c2290bd Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 May 2017 16:03:04 +0100 Subject: [PATCH 51/54] updated rspec --- Gemfile | 4 ++-- metadata.json | 2 +- spec/spec_helper.rb | 12 ++++++++++++ .../parser/functions/construct_auth_string_spec.rb | 14 +++++++++----- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Gemfile b/Gemfile index 3508cee..487af84 100644 --- a/Gemfile +++ b/Gemfile @@ -2,5 +2,5 @@ source 'https://rubygems.org' puppetversion = ENV.key?('PUPPET_VERSION') ? "= #{ENV['PUPPET_VERSION']}" : ['>= 2.7'] gem 'puppet', puppetversion -gem 'puppet-lint', '>= 0.3.2' -gem 'puppetlabs_spec_helper', '>= 0.1.0' \ No newline at end of file +gem 'puppet-lint', '~> 2.0' +gem 'puppetlabs_spec_helper', '~> 2.1' diff --git a/metadata.json b/metadata.json index 9799a5d..fcc5199 100644 --- a/metadata.json +++ b/metadata.json @@ -1,6 +1,6 @@ { "name": "HEPPuppet-htcondor", - "version": "2.0.0", + "version": "2.0.1", "summary": "Puppet module for HTCondor batch system", "author": "HEPPuppet", "dependencies": [ diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index e02fc4c..2523411 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,7 +1,13 @@ require 'puppetlabs_spec_helper/module_spec_helper' +# hack to enable all the expect syntax (like allow_any_instance_of) in rspec-puppet examples +RSpec::Mocks::Syntax.enable_expect(RSpec::Puppet::ManifestMatchers) + RSpec.configure do |c| + c.add_setting :puppet_future + c.puppet_future = Puppet.version.to_f >= 4.0 c.treat_symbols_as_metadata_keys_with_true_values = true + c.before :each do # Ensure that we don't accidentally cache facts and environment # between test cases. @@ -14,6 +20,12 @@ if ENV['STRICT_VARIABLES'] == 'yes' Puppet.settings[:strict_variables]=true end + RSpec::Mocks.setup + end + + c.after :each do + RSpec::Mocks.verify + RSpec::Mocks.teardown end end shared_examples :compile, :compile => true do diff --git a/spec/unit/puppet/parser/functions/construct_auth_string_spec.rb b/spec/unit/puppet/parser/functions/construct_auth_string_spec.rb index d4d0955..c298fc8 100644 --- a/spec/unit/puppet/parser/functions/construct_auth_string_spec.rb +++ b/spec/unit/puppet/parser/functions/construct_auth_string_spec.rb @@ -9,23 +9,27 @@ context 'construct_auth_string test' do it "get FS right" do - result = scope.function_construct_auth_string([true, false, false, false]) + result = scope.function_construct_auth_string([true, false, false, false, false]) expect(result).to eq('FS') end it "get FS,PASSWORD right" do - result = scope.function_construct_auth_string([true, true, false, false]) + result = scope.function_construct_auth_string([true, true, false, false, false]) expect(result).to eq('FS,PASSWORD') end it "get FS,PASSWORD,KERBEROS right" do - result = scope.function_construct_auth_string([true, true, true, false]) + result = scope.function_construct_auth_string([true, true, true, false, false]) expect(result).to eq('FS,PASSWORD,KERBEROS') end it "get FS,PASSWORD,KERBEROS,CLAIMTOBE right" do - result = scope.function_construct_auth_string([true, true, true, true]) + result = scope.function_construct_auth_string([true, true, true, true, false]) expect(result).to eq('FS,PASSWORD,KERBEROS,CLAIMTOBE') end + it "get FS,PASSWORD,KERBEROS,CLAIMTOBE,ANONYMOUS right" do + result = scope.function_construct_auth_string([true, true, true, true, true]) + expect(result).to eq('FS,PASSWORD,KERBEROS,CLAIMTOBE,ANONYMOUS') + end it "get FS,KERBEROS right" do - result = scope.function_construct_auth_string([true, false, true, false]) + result = scope.function_construct_auth_string([true, false, true, false, false]) expect(result).to eq('FS,KERBEROS') end end From d204a4c406d71dd69ee995d75d9ebe85e80ebc64 Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 May 2017 16:19:36 +0100 Subject: [PATCH 52/54] updated changelog --- .gitignore | 2 + CHANGELOG.md | 107 +++++++++++++++++++++++++++++++++++++++++++++++++-- Gemfile | 1 + HISTORY.md | 31 +++++++++++++++ README.md | 5 +++ 5 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 HISTORY.md diff --git a/.gitignore b/.gitignore index 926c1f3..d50087f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ files/pool_password +Gemfile.lock +pkg diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d1d20f..503e91e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,103 @@ +# Change Log + +## [v1.3.1](https://github.com/hep-puppet/htcondor/tree/v1.3.1) (2017-05-18) +[Full Changelog](https://github.com/hep-puppet/htcondor/compare/v1.3.0...v1.3.1) + +**Implemented enhancements:** + +- Add profiles [\#46](https://github.com/HEP-Puppet/htcondor/issues/46) +- Simplify parameters [\#44](https://github.com/HEP-Puppet/htcondor/issues/44) +- Repository clean up [\#43](https://github.com/HEP-Puppet/htcondor/issues/43) +- Towards version 2.0 - part 3 [\#55](https://github.com/HEP-Puppet/htcondor/pull/55) ([kreczko](https://github.com/kreczko)) +- Step 2 towards version 2.0 [\#54](https://github.com/HEP-Puppet/htcondor/pull/54) ([kreczko](https://github.com/kreczko)) +- Big simplifications [\#53](https://github.com/HEP-Puppet/htcondor/pull/53) ([kreczko](https://github.com/kreczko)) + +**Closed issues:** + +- security considerations ? [\#42](https://github.com/HEP-Puppet/htcondor/issues/42) +- wrong permissions if condor user != root [\#38](https://github.com/HEP-Puppet/htcondor/issues/38) + +**Merged pull requests:** + +- Version 1.3.1 [\#52](https://github.com/HEP-Puppet/htcondor/pull/52) ([kreczko](https://github.com/kreczko)) +- \[New feature\] high-availability deployment for multiple managers [\#51](https://github.com/HEP-Puppet/htcondor/pull/51) ([kreczko](https://github.com/kreczko)) +- 2016 spring clean [\#50](https://github.com/HEP-Puppet/htcondor/pull/50) ([kreczko](https://github.com/kreczko)) + +## [v1.3.0](https://github.com/hep-puppet/htcondor/tree/v1.3.0) (2016-01-29) +[Full Changelog](https://github.com/hep-puppet/htcondor/compare/v1.2.0...v1.3.0) + +**Implemented enhancements:** + +- bumping version to 1.2.0 + tidying up [\#40](https://github.com/HEP-Puppet/htcondor/pull/40) ([kreczko](https://github.com/kreczko)) + +**Closed issues:** + +- Publish on Puppet Forge [\#39](https://github.com/HEP-Puppet/htcondor/issues/39) +- Swap repositories for dependency on grid\_repos [\#3](https://github.com/HEP-Puppet/htcondor/issues/3) + +**Merged pull requests:** + +- Dropped Puppet 2.7 from tests and added Puppet 4.0 [\#47](https://github.com/HEP-Puppet/htcondor/pull/47) ([kreczko](https://github.com/kreczko)) +- enable cgroup [\#41](https://github.com/HEP-Puppet/htcondor/pull/41) ([kashif74](https://github.com/kashif74)) + +## [v1.2.0](https://github.com/hep-puppet/htcondor/tree/v1.2.0) (2014-11-12) +[Full Changelog](https://github.com/hep-puppet/htcondor/compare/v1.1.0...v1.2.0) + +**Merged pull requests:** + +- Added kerberos map file [\#37](https://github.com/HEP-Puppet/htcondor/pull/37) ([kreczko](https://github.com/kreczko)) +- setting default FILESYSTEM\_DOMAIN to FQDN \(not all WNs have shared FS\) [\#36](https://github.com/HEP-Puppet/htcondor/pull/36) ([kreczko](https://github.com/kreczko)) +- Improving auth method setting [\#35](https://github.com/HEP-Puppet/htcondor/pull/35) ([kreczko](https://github.com/kreczko)) +- Condor id fix, better tests and puppet-lint fixes [\#33](https://github.com/HEP-Puppet/htcondor/pull/33) ([kreczko](https://github.com/kreczko)) + +## [v1.1.0](https://github.com/hep-puppet/htcondor/tree/v1.1.0) (2014-10-16) +[Full Changelog](https://github.com/hep-puppet/htcondor/compare/v1.0.0...v1.1.0) + +**Closed issues:** + +- htcondor files ownership [\#14](https://github.com/HEP-Puppet/htcondor/issues/14) + +**Merged pull requests:** + +- Exclude condor-i686 and few other bits [\#32](https://github.com/HEP-Puppet/htcondor/pull/32) ([kashif74](https://github.com/kashif74)) +- A few small changes [\#31](https://github.com/HEP-Puppet/htcondor/pull/31) ([kreczko](https://github.com/kreczko)) +- New feature: ganglia [\#30](https://github.com/HEP-Puppet/htcondor/pull/30) ([kreczko](https://github.com/kreczko)) +- New feature: kerberos [\#29](https://github.com/HEP-Puppet/htcondor/pull/29) ([kreczko](https://github.com/kreczko)) +- Queues with hiera or config [\#28](https://github.com/HEP-Puppet/htcondor/pull/28) ([kreczko](https://github.com/kreczko)) +- Fairshares updated [\#27](https://github.com/HEP-Puppet/htcondor/pull/27) ([kreczko](https://github.com/kreczko)) +- email, default and filesystem domains [\#26](https://github.com/HEP-Puppet/htcondor/pull/26) ([kreczko](https://github.com/kreczko)) +- Request memory [\#25](https://github.com/HEP-Puppet/htcondor/pull/25) ([kreczko](https://github.com/kreczko)) +- adding option for DEV repositories [\#24](https://github.com/HEP-Puppet/htcondor/pull/24) ([kreczko](https://github.com/kreczko)) +- Defrag and partitionable slots [\#23](https://github.com/HEP-Puppet/htcondor/pull/23) ([kreczko](https://github.com/kreczko)) +- fix ":" -\> "=" [\#22](https://github.com/HEP-Puppet/htcondor/pull/22) ([kreczko](https://github.com/kreczko)) + +## [v1.0.0](https://github.com/hep-puppet/htcondor/tree/v1.0.0) (2014-08-07) +[Full Changelog](https://github.com/hep-puppet/htcondor/compare/New features...v1.0.0) + +**Closed issues:** + +- Generic template for fair shares [\#8](https://github.com/HEP-Puppet/htcondor/issues/8) +- Fix fair shares and groups [\#4](https://github.com/HEP-Puppet/htcondor/issues/4) + +**Merged pull requests:** + +- Added defrag and healthcheck [\#19](https://github.com/HEP-Puppet/htcondor/pull/19) ([kashif74](https://github.com/kashif74)) +- make sure condor\_reconfig is not run before service is up [\#18](https://github.com/HEP-Puppet/htcondor/pull/18) ([fschaer](https://github.com/fschaer)) +- allow user-defined templates to be specified [\#17](https://github.com/HEP-Puppet/htcondor/pull/17) ([fschaer](https://github.com/fschaer)) +- Fix3 [\#16](https://github.com/HEP-Puppet/htcondor/pull/16) ([fschaer](https://github.com/fschaer)) +- specify file ownership and allow for user \(root\) override, as this is [\#15](https://github.com/HEP-Puppet/htcondor/pull/15) ([fschaer](https://github.com/fschaer)) +- be librarian-puppet friendly [\#13](https://github.com/HEP-Puppet/htcondor/pull/13) ([fschaer](https://github.com/fschaer)) +- Changes for seperate scheduler configuartion [\#12](https://github.com/HEP-Puppet/htcondor/pull/12) ([kashif74](https://github.com/kashif74)) +- Fixes for Nagios tests [\#11](https://github.com/HEP-Puppet/htcondor/pull/11) ([kreczko](https://github.com/kreczko)) +- Fairshare fixes [\#10](https://github.com/HEP-Puppet/htcondor/pull/10) ([kreczko](https://github.com/kreczko)) +- Fixes for issues \#4 and \#8 + other stuff [\#9](https://github.com/HEP-Puppet/htcondor/pull/9) ([kreczko](https://github.com/kreczko)) +- Updating things for productin [\#7](https://github.com/HEP-Puppet/htcondor/pull/7) ([kreczko](https://github.com/kreczko)) +- new version [\#6](https://github.com/HEP-Puppet/htcondor/pull/6) ([kashif74](https://github.com/kashif74)) +- Added priority to repo [\#5](https://github.com/HEP-Puppet/htcondor/pull/5) ([kashif74](https://github.com/kashif74)) +- First working version of Puppet module for HTCondor [\#1](https://github.com/HEP-Puppet/htcondor/pull/1) ([kreczko](https://github.com/kreczko)) + # Version 2.0.0 -Version 2.0.0 brought big changes to the module. The bigest change is a structual one. +Version 2.0.0 brought big changes to the module. The biggest change is a structural one. `htcondor::params.pp` was added to set defaults for all the parameters. In addition, parameters are attempted to be read via `hiera` first. Full merge support for hashes and arrays is provided. @@ -23,9 +121,12 @@ The full detail of these changes can be seen in [PR 53](https://github.com/HEP-P - removed repository dependency if it is disabled ## Other -- changed config templates to ensure new line at the end of the file and reduced the use of `-%>` +- changed config templates to ensure new line at the end of the file and reduced the use of `-%>` - workers are no longer able to write to schedulers by default - new formatting for the security config: one line per entry for manager/scheduler/worker - removed `use_pkg_config` parameter. - no longer changing `/etc/condor/condor_config` nor `/etc/condor/condor_config.local` as recommended by the HTCondor team -- content previously in `/etc/condor/condor_config.local` now in `/etc/condor/config.d/00_config_local.config` \ No newline at end of file +- content previously in `/etc/condor/condor_config.local` now in `/etc/condor/config.d/00_config_local.config` + + +\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* \ No newline at end of file diff --git a/Gemfile b/Gemfile index 487af84..d56416a 100644 --- a/Gemfile +++ b/Gemfile @@ -4,3 +4,4 @@ puppetversion = ENV.key?('PUPPET_VERSION') ? "= #{ENV['PUPPET_VERSION']}" : ['>= gem 'puppet', puppetversion gem 'puppet-lint', '~> 2.0' gem 'puppetlabs_spec_helper', '~> 2.1' +gem 'github_changelog_generator', '<= 1.14.3' diff --git a/HISTORY.md b/HISTORY.md new file mode 100644 index 0000000..6b53361 --- /dev/null +++ b/HISTORY.md @@ -0,0 +1,31 @@ +# Version 2.0.0 +Version 2.0.0 brought big changes to the module. The biggest change is a structural one. +`htcondor::params.pp` was added to set defaults for all the parameters. +In addition, parameters are attempted to be read via `hiera` first. Full merge +support for hashes and arrays is provided. +With these changes the `htcondor::config.pp` was split into six pieces: + - the main config setting up the rest + - a common config part + - the security configuration + - separate configs for manager, scheduler & worker +The full detail of these changes can be seen in [PR 53](https://github.com/HEP-Puppet/htcondor/pull/53). + +## New features +- configure connection broker for private workers (i.e. workers that cannot be reached from the manager or scheduler but can reach the manager). +- enabled `ganglia` daemon for schedulers (previously only possible on managers) +- flag to enable [condor reporting](http://research.cs.wisc.edu/htcondor/privacy.html), disabed by default +- added `use_anonymous_auth` +- added `custom_machine_attributes` and `custom_machine_attributes` which can be used to add classads for `STARTD_ATTRS` and `STARTD_JOB_ATTRS` + +## Bug fixes +- daemon list would be incorrect for some versions of Ruby. This was due to the use of `and` and `or` operators which is incorrect for boolean comparisons. +- added missing `cluster_has_multiple_domains` parameter (w.r.t to 2.0.0 beta) +- removed repository dependency if it is disabled + +## Other +- changed config templates to ensure new line at the end of the file and reduced the use of `-%>` +- workers are no longer able to write to schedulers by default +- new formatting for the security config: one line per entry for manager/scheduler/worker +- removed `use_pkg_config` parameter. +- no longer changing `/etc/condor/condor_config` nor `/etc/condor/condor_config.local` as recommended by the HTCondor team +- content previously in `/etc/condor/condor_config.local` now in `/etc/condor/config.d/00_config_local.config` diff --git a/README.md b/README.md index 24bc4f3..d12ef1e 100644 --- a/README.md +++ b/README.md @@ -86,3 +86,8 @@ and `custom_job_attributes` are added to `STARTD_JOB_ATTRS`. Please run ```bundle exec rake validate && bundle exec rake lint && bundle exec rake spec SPEC_OPTS='--format documentation'``` and make sure no errors are present when submitting code. + +### Generating changlelog +``` +github_changelog_generator -u hep-puppet -p htcondor +``` From 22c25b88777ab40e613b77245e50fa9b7989c8dc Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 May 2017 16:22:13 +0100 Subject: [PATCH 53/54] updated readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d12ef1e..76e81e7 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ #Puppet module for HTCondor batch system -Latest stable version: https://github.com/HEP-Puppet/htcondor/releases/tag/v1.3.1 +Latest stable version: https://github.com/HEP-Puppet/htcondor/releases/tag/v2.0.1 -Development branch (heading for 2.0.0): https://github.com/HEP-Puppet/htcondor/tree/development +Development branch: https://github.com/HEP-Puppet/htcondor/tree/development [![Build Status](https://travis-ci.org/HEP-Puppet/htcondor.svg?branch=master)](https://travis-ci.org/HEP-Puppet/htcondor) From a3e2a04d24e7500be442a010c6c74f76ac23c3d1 Mon Sep 17 00:00:00 2001 From: kreczko Date: Thu, 18 May 2017 17:01:01 +0100 Subject: [PATCH 54/54] fixing rspec warnings --- .travis.yml | 13 +++++++------ Gemfile | 23 +++++++++++++++++++++-- Rakefile | 5 +++-- manifests/service.pp | 6 ++++-- metadata.json | 6 +++--- spec/spec_helper.rb | 4 +++- 6 files changed, 41 insertions(+), 16 deletions(-) diff --git a/.travis.yml b/.travis.yml index 668fb85..d4e5c10 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,17 +1,18 @@ --- +sudo: false language: ruby +cache: bundler bundler_args: --without development script: - - puppet --version - "bundle exec rake validate && bundle exec rake lint && bundle exec rake spec SPEC_OPTS='--format documentation'" matrix: fast_finish: true include: - - rvm: 1.9.3 + - rvm: 2.1.7 env: PUPPET_GEM_VERSION="~> 3.0" - - rvm: 2.1.5 - env: PUPPET_GEM_VERSION="~> 3.0" - - rvm: 2.1.6 - env: PUPPET_GEM_VERSION="~> 4.0" STRICT_VARIABLES="yes" + - rvm: 2.1.7 + env: PUPPET_GEM_VERSION="~> 4.0" + - rvm: 2.3.1 + env: PUPPET_GEM_VERSION="~> 4.0" notifications: email: false diff --git a/Gemfile b/Gemfile index d56416a..93bbcca 100644 --- a/Gemfile +++ b/Gemfile @@ -1,7 +1,26 @@ source 'https://rubygems.org' -puppetversion = ENV.key?('PUPPET_VERSION') ? "= #{ENV['PUPPET_VERSION']}" : ['>= 2.7'] -gem 'puppet', puppetversion +# Find a location or specific version for a gem. place_or_version can be a +# version, which is most often used. It can also be git, which is specified as +# `git://somewhere.git#branch`. You can also use a file source location, which +# is specified as `file://some/location/on/disk`. +def location_for(place_or_version, fake_version = nil) + if place_or_version =~ /^(git[:@][^#]*)#(.*)/ + [fake_version, { :git => $1, :branch => $2, :require => false }].compact + elsif place_or_version =~ /^file:\/\/(.*)/ + ['>= 0', { :path => File.expand_path($1), :require => false }] + else + [place_or_version, { :require => false }] + end +end + +supports_windows = false + +gem 'puppet', *location_for(ENV['PUPPET_GEM_VERSION']) gem 'puppet-lint', '~> 2.0' gem 'puppetlabs_spec_helper', '~> 2.1' gem 'github_changelog_generator', '<= 1.14.3' +gem 'rspec-puppet', '~> 2.5' +gem 'rspec-puppet-facts' +gem 'rspec-puppet-utils' +gem 'metadata-json-lint' diff --git a/Rakefile b/Rakefile index 3c30d48..c7d19bd 100644 --- a/Rakefile +++ b/Rakefile @@ -2,11 +2,12 @@ require 'rubygems' require 'puppetlabs_spec_helper/rake_tasks' require 'puppet-lint/tasks/puppet-lint' -PuppetLint.configuration.fail_on_warnings +PuppetLint.configuration.fail_on_warnings = true PuppetLint.configuration.send('relative') PuppetLint.configuration.send('disable_80chars') PuppetLint.configuration.send('disable_class_inherits_from_params_class') PuppetLint.configuration.send('disable_documentation') PuppetLint.configuration.send('disable_single_quote_string_with_variables') PuppetLint.configuration.send('disable_only_variable_string') -PuppetLint.configuration.ignore_paths = ["spec/**/*.pp", "pkg/**/*.pp"] +PuppetLint.configuration.ignore_paths = ["spec/**/*", "pkg/**/*", 'vendor/**/*'] +PuppetLint.configuration.exclude_paths = ["spec/**/*", "pkg/**/*", 'vendor/**/*'] diff --git a/manifests/service.pp b/manifests/service.pp index 930df9a..de3d8a9 100644 --- a/manifests/service.pp +++ b/manifests/service.pp @@ -8,9 +8,11 @@ hasrestart => true, hasstatus => true, } - -> - #this exec is called from the config, but we can't run it if the condor service is not up. it's a RE-config command assuming something is already up. + + # this exec is called from the config, but we can't run it if the condor + # service is not up. it's a RE-config command assuming something is already up. exec{ '/usr/sbin/condor_reconfig': refreshonly => true, + require => Service['condor'] } } diff --git a/metadata.json b/metadata.json index fcc5199..9ec3486 100644 --- a/metadata.json +++ b/metadata.json @@ -6,14 +6,14 @@ "dependencies": [ { "name": "puppetlabs/firewall", - "version_requirement": "\u003e\u003d0.3.1" + "version_requirement": "~> 1.8" }, { "name": "puppetlabs/stdlib", - "version_requirement": "\u003e\u003d4.1.0" + "version_requirement": "~> 4.1" } ], "source": "git@github.com:HEP-Puppet/puppet-htcondor.git", "project_page": "https://github.com/HEP-Puppet", - "license": "Apache License, Version 2.0" + "license": "Apache-2.0" } diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 2523411..b8bdc06 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,4 +1,7 @@ require 'puppetlabs_spec_helper/module_spec_helper' +require 'rspec-puppet-utils' +require 'rspec/mocks' +require 'rspec-puppet-facts' # hack to enable all the expect syntax (like allow_any_instance_of) in rspec-puppet examples RSpec::Mocks::Syntax.enable_expect(RSpec::Puppet::ManifestMatchers) @@ -6,7 +9,6 @@ RSpec.configure do |c| c.add_setting :puppet_future c.puppet_future = Puppet.version.to_f >= 4.0 - c.treat_symbols_as_metadata_keys_with_true_values = true c.before :each do # Ensure that we don't accidentally cache facts and environment