Skip to content

Commit

Permalink
Merge pull request #2619 from GoogleCloudPlatform/release-candidate
Browse files Browse the repository at this point in the history
Release v1.34.0
  • Loading branch information
harshthakkar01 committed May 24, 2024
2 parents 146ebbe + 3d77821 commit 5b360ae
Show file tree
Hide file tree
Showing 291 changed files with 3,627 additions and 2,653 deletions.
47 changes: 45 additions & 2 deletions cmd/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@
package cmd

import (
"bufio"
"fmt"
"hpc-toolkit/pkg/config"
"hpc-toolkit/pkg/logging"
"hpc-toolkit/pkg/modulewriter"
"hpc-toolkit/pkg/shell"
"os"
"path/filepath"
"strings"

"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -62,6 +65,11 @@ func runDestroyCmd(cmd *cobra.Command, args []string) {
group := bp.Groups[i]
groupDir := filepath.Join(deplRoot, string(group.Name))

if err := shell.ImportInputs(groupDir, artifactsDir, bp); err != nil {
logging.Error("failed to import inputs for group %q: %v", group.Name, err)
// still proceed with destroying the group
}

var err error
switch group.Kind() {
case config.PackerKind:
Expand All @@ -72,9 +80,16 @@ func runDestroyCmd(cmd *cobra.Command, args []string) {
case config.TerraformKind:
err = destroyTerraformGroup(groupDir)
default:
err = fmt.Errorf("group %s is an unsupported kind %s", groupDir, group.Kind().String())
err = fmt.Errorf("group %q is an unsupported kind %q", groupDir, group.Kind().String())
}

if err != nil {
logging.Error("failed to destroy group %q:\n%s", group.Name, renderError(err, *ctx))
if i == 0 || !destroyChoice(bp.Groups[i-1].Name) {
logging.Fatal("destruction of %q failed", deplRoot)
}
}
checkErr(err, ctx)

}

modulewriter.WritePackerDestroyInstructions(os.Stdout, packerManifests)
Expand All @@ -88,3 +103,31 @@ func destroyTerraformGroup(groupDir string) error {

return shell.Destroy(tf, getApplyBehavior())
}

func destroyChoice(nextGroup config.GroupName) bool {
switch getApplyBehavior() {
case shell.AutomaticApply:
return true
case shell.PromptBeforeApply:
// pass; proceed with prompt
default:
return false
}

reader := bufio.NewReader(os.Stdin)
for {
fmt.Printf("Do you want to delete the next group %q [y/n]?: ", nextGroup)

in, err := reader.ReadString('\n')
if err != nil {
logging.Fatal("%v", err)
}

switch strings.ToLower(strings.TrimSpace(in)) {
case "y":
return true
case "n":
return false
}
}
}
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ HPC deployments on the Google Cloud Platform.`,
logging.Fatal("cmd.Help function failed: %s", err)
}
},
Version: "v1.33.0",
Version: "v1.34.0",
Annotations: annotation,
}
)
Expand Down
5 changes: 2 additions & 3 deletions community/examples/AMD/README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
# AMD solutions for the HPC Toolkit

> [!NOTE]
> This document uses Slurm-GCP v5. A newer [blueprint](./hpc-amd-slurm-v6.yaml)
> using v6 has been published. This documentation will be updated and the v5
> blueprint deprecated.
> This document uses Slurm-GCP v6. If you want to use Slurm-GCP v5 version you
> scan refer [blueprint](./hpc-amd-slurm-v5-legacy.yaml)
## AMD-Optimized Slurm Cluster

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2024 Google LLC
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,36 +13,37 @@
# limitations under the License.

---
blueprint_name: hpc-amd-slurm-v6
blueprint_name: hpc-amd-slurm

vars:
project_id: ## Set GCP Project ID Here ##
deployment_name: amd-v6
deployment_name: amd-v5
region: us-east4
zone: us-east4-c

deployment_groups:
- group: primary
modules:
- id: network
- id: network1
source: modules/network/vpc

- id: homefs
source: modules/file-system/filestore
use: [network]
use: [network1]
settings:
local_mount: /home

- id: swfs
source: modules/file-system/filestore
use: [network]
use: [network1]
settings:
local_mount: /sw

- id: spack-setup
source: community/modules/scripts/spack-setup
settings:
install_dir: /sw/spack
spack_ref: v0.18.1

- id: spack-execute
source: community/modules/scripts/spack-execute
Expand All @@ -66,15 +67,15 @@ deployment_groups:
packages:
slurm:
externals:
- spec: slurm@23-11-3
- spec: slurm@22-05-8
prefix: /usr/local
buildable: False
- destination: /sw/spack/openfoam_env.yaml
content: |
spack:
definitions:
- compilers:
- gcc@13.1.0
- gcc@10.3.0
- mpis:
- openmpi@4.1.3+legacylaunchers+pmi fabrics=none schedulers=slurm
- packages:
Expand All @@ -101,8 +102,9 @@ deployment_groups:
spack config --scope site add concretizer:targets:host_compatible:false
# gcc 12.1.0 is known to have runtime failures with OpenFOAM 8
spack install gcc@13.1.0 %gcc@8.5.0 target=x86_64
spack load gcc@13.1.0 %gcc@8.5.0 target=x86_64
# gcc 10.3.0 is the earliest copy of gcc with Zen 3 support
spack install gcc@10.3.0 %gcc@4.8.5 target=x86_64
spack load gcc@10.3.0 %gcc@4.8.5 target=x86_64
spack compiler find --scope site
if ! spack env list | grep -q openfoam; then
Expand Down Expand Up @@ -160,68 +162,70 @@ deployment_groups:
- id: spack_builder
source: modules/compute/vm-instance
use: [network, swfs, spack-startup]
use: [network1, swfs, spack-startup]
settings:
name_prefix: spack-builder
machine_type: c2d-standard-16
disable_public_ips: true
instance_image:
# these images must match the images used by Slurm modules below because
# we are building OpenMPI with PMI support in libraries contained in
# Slurm installation
family: slurm-gcp-6-4-hpc-rocky-linux-8
family: slurm-gcp-5-11-hpc-centos-7
project: schedmd-slurm-public

- id: low_cost_nodeset
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
use: [network]
- id: low_cost_node_group
source: community/modules/compute/schedmd-slurm-gcp-v5-node-group
settings:
machine_type: c2d-standard-4
node_count_dynamic_max: 10
bandwidth_tier: gvnic_enabled
enable_placement: false

- id: low_cost_partition
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
use: [low_cost_nodeset]
source: community/modules/compute/schedmd-slurm-gcp-v5-partition
use:
- network1
- low_cost_node_group
settings:
partition_name: lowcost
enable_placement: false

- id: compute_nodeset
source: community/modules/compute/schedmd-slurm-gcp-v6-nodeset
use: [network]
- id: compute_node_group
source: community/modules/compute/schedmd-slurm-gcp-v5-node-group
settings:
machine_type: c2d-standard-112
node_count_dynamic_max: 50
bandwidth_tier: gvnic_enabled
enable_placement: true

# Because is_default is set to true, jobs will run on this partition unless an
# because is_default is set to true, jobs will run on this partition unless an
# alternative partition is specified using, for example, "srun -p lowcost"
- id: compute_partition
source: community/modules/compute/schedmd-slurm-gcp-v6-partition
use: [compute_nodeset]
source: community/modules/compute/schedmd-slurm-gcp-v5-partition
use:
- network1
- compute_node_group
settings:
partition_name: compute
enable_placement: true
is_default: true

- id: slurm_login
source: community/modules/scheduler/schedmd-slurm-gcp-v6-login
use: [network]
settings:
# need at least 8 physical cores to run OpenFOAM test
machine_type: c2d-standard-16
name_prefix: login

- id: slurm_controller
source: community/modules/scheduler/schedmd-slurm-gcp-v6-controller
source: community/modules/scheduler/schedmd-slurm-gcp-v5-controller
use:
- network
- network1
- homefs
- swfs
- low_cost_partition
- compute_partition
- slurm_login
settings:
machine_type: c2d-standard-4
login_startup_script: $(slurm_startup.startup_script)
login_startup_scripts_timeout: 21600

- id: slurm_login
source: community/modules/scheduler/schedmd-slurm-gcp-v5-login
use:
- network1
- slurm_controller
- slurm_startup
settings:
# need at least 8 physical cores to run OpenFOAM test
machine_type: c2d-standard-16
Loading

0 comments on commit 5b360ae

Please sign in to comment.