From ac842e099c477db97c12e19adf9c15b79efb3966 Mon Sep 17 00:00:00 2001 From: Richard Gooch Date: Tue, 2 Apr 2019 07:37:05 -0700 Subject: [PATCH 1/3] Add hypervisor/manager.Manager.ShutdownVMsAndExit() method. --- hypervisor/manager/api.go | 4 +++ hypervisor/manager/stop.go | 55 ++++++++++++++++++++++++++++++++++++++ hypervisor/manager/vm.go | 8 ++++++ 3 files changed, 67 insertions(+) create mode 100644 hypervisor/manager/stop.go diff --git a/hypervisor/manager/api.go b/hypervisor/manager/api.go index eedef534..98391513 100644 --- a/hypervisor/manager/api.go +++ b/hypervisor/manager/api.go @@ -309,6 +309,10 @@ func (m *Manager) RestoreVmUserData(ipAddr net.IP, return m.restoreVmUserData(ipAddr, authInfo) } +func (m *Manager) ShutdownVMsAndExit() { + m.shutdownVMsAndExit() +} + func (m *Manager) SnapshotVm(ipAddr net.IP, authInfo *srpc.AuthInformation, forceIfNotStopped, snapshotRootOnly bool) error { return m.snapshotVm(ipAddr, authInfo, forceIfNotStopped, snapshotRootOnly) diff --git a/hypervisor/manager/stop.go b/hypervisor/manager/stop.go new file mode 100644 index 00000000..2aba1afd --- /dev/null +++ b/hypervisor/manager/stop.go @@ -0,0 +1,55 @@ +package manager + +import ( + "os" + "sync" + "time" + + proto "github.com/Symantec/Dominator/proto/hypervisor" +) + +type flusher interface { + Flush() error +} + +func (m *Manager) shutdownVMsAndExit() { + var waitGroup sync.WaitGroup + m.mutex.RLock() + for _, vm := range m.vms { + waitGroup.Add(1) + go func(vm *vmInfoType) { + defer waitGroup.Done() + vm.shutdown() + }(vm) + } + waitGroup.Wait() + m.Logger.Println("stopping cleanly after shutting down VMs") + if flusher, ok := m.Logger.(flusher); ok { + flusher.Flush() + } + os.Exit(0) +} + +func (vm *vmInfoType) shutdown() { + vm.mutex.RLock() + switch vm.State { + case proto.StateStarting, proto.StateRunning: + stoppedNotifier := make(chan struct{}, 1) + vm.stoppedNotifier = stoppedNotifier + vm.commandChannel <- "system_powerdown" + vm.mutex.RUnlock() + timer := time.NewTimer(time.Minute) + select { + case <-stoppedNotifier: + if !timer.Stop() { + <-timer.C + } + vm.logger.Println("shut down cleanly for system shutdown") + case <-timer.C: + vm.logger.Println("shutdown timed out: killing VM") + vm.commandChannel <- "quit" + } + default: + vm.mutex.RUnlock() + } +} diff --git a/hypervisor/manager/vm.go b/hypervisor/manager/vm.go index e6c9e4a7..bcbf7de7 100644 --- a/hypervisor/manager/vm.go +++ b/hypervisor/manager/vm.go @@ -2243,8 +2243,16 @@ func (vm *vmInfoType) processMonitorResponses(monitorSock net.Conn) { vm.commandChannel = nil switch vm.State { case proto.StateStarting: + select { + case vm.stoppedNotifier <- struct{}{}: + default: + } return case proto.StateRunning: + select { + case vm.stoppedNotifier <- struct{}{}: + default: + } return case proto.StateFailedToStart: return From 3349a5f34819d49418e1374d62a2bce833c11fde Mon Sep 17 00:00:00 2001 From: Richard Gooch Date: Wed, 3 Apr 2019 08:40:21 -0700 Subject: [PATCH 2/3] Add hypervisor subcommands to cleanly shut down VMs. --- cmd/hypervisor/control.go | 135 ++++++++++++++++++++++++++++++++++++++ cmd/hypervisor/main.go | 32 +++++++++ 2 files changed, 167 insertions(+) create mode 100644 cmd/hypervisor/control.go diff --git a/cmd/hypervisor/control.go b/cmd/hypervisor/control.go new file mode 100644 index 00000000..271e0687 --- /dev/null +++ b/cmd/hypervisor/control.go @@ -0,0 +1,135 @@ +package main + +import ( + "fmt" + "net" + "os" + "path/filepath" + + "github.com/Symantec/Dominator/hypervisor/manager" + "github.com/Symantec/Dominator/lib/fsutil" + "github.com/Symantec/Dominator/lib/log" +) + +var shutdownVMsOnNextStop bool + +type flusher interface { + Flush() error +} + +func acceptControlConnections(m *manager.Manager, listener net.Listener, + logger log.DebugLogger) { + for { + if conn, err := listener.Accept(); err != nil { + logger.Println(err) + } else if err := processControlConnection(conn, m, logger); err != nil { + logger.Println(err) + } + } +} + +func configureVMsToStopOnNextStop() { + sendRequest(connectToControl(), "stop-vms-on-next-stop") +} + +func connectToControl() net.Conn { + sockAddr := filepath.Join(*stateDir, "control") + if conn, err := net.Dial("unix", sockAddr); err != nil { + fmt.Fprintf(os.Stderr, "Error connecting to: %s: %s\n", sockAddr, err) + os.Exit(1) + return nil + } else { + return conn + } +} + +func listenForControl(m *manager.Manager, logger log.DebugLogger) error { + sockAddr := filepath.Join(*stateDir, "control") + os.Remove(sockAddr) + if listener, err := net.Listen("unix", sockAddr); err != nil { + return err + } else { + if err := os.Chmod(sockAddr, fsutil.PrivateFilePerms); err != nil { + return err + } + go acceptControlConnections(m, listener, logger) + return nil + } +} + +func processControlConnection(conn net.Conn, m *manager.Manager, + logger log.DebugLogger) error { + defer conn.Close() + buffer := make([]byte, 256) + if nRead, err := conn.Read(buffer); err != nil { + return fmt.Errorf("error reading request: %s\n", err) + } else if nRead < 1 { + return fmt.Errorf("read short request: %s\n", err) + } else { + request := string(buffer[:nRead]) + if request[nRead-1] != '\n' { + return fmt.Errorf("request not null-terminated: %s\n", request) + } + request = request[:nRead-1] + switch request { + case "stop": + if _, err := fmt.Fprintln(conn, "ok"); err != nil { + return err + } + if shutdownVMsOnNextStop { + m.ShutdownVMsAndExit() + } else { + logger.Println("stopping without shutting down VMs") + if flusher, ok := logger.(flusher); ok { + flusher.Flush() + } + os.Exit(0) + } + case "stop-vms-on-next-stop": + if _, err := fmt.Fprintln(conn, "ok"); err != nil { + return err + } + shutdownVMsOnNextStop = true + default: + if _, err := fmt.Fprintln(conn, "bad request"); err != nil { + return err + } + } + } + return nil +} + +func requestStop() { + sendRequest(connectToControl(), "stop") +} + +func sendRequest(conn net.Conn, request string) { + if _, err := fmt.Fprintln(conn, request); err != nil { + fmt.Fprintf(os.Stderr, "Error writing request: %s\n", err) + os.Exit(1) + } + buffer := make([]byte, 256) + if nRead, err := conn.Read(buffer); err != nil { + fmt.Fprintf(os.Stderr, "Error reading response: %s\n", err) + os.Exit(1) + } else if nRead < 1 { + fmt.Fprintf(os.Stderr, "Read short response: %s\n", err) + os.Exit(1) + } else { + response := string(buffer[:nRead]) + if response[nRead-1] != '\n' { + fmt.Fprintf(os.Stderr, "Response not null-terminated: %s\n", + response) + os.Exit(1) + } + response = response[:nRead-1] + if response != "ok" { + fmt.Fprintf(os.Stderr, "Bad response: %s\n", response) + os.Exit(1) + } else { + conn.Read(buffer) // Wait for EOF. + conn.Close() + os.Exit(0) + } + } +} diff --git a/cmd/hypervisor/main.go b/cmd/hypervisor/main.go index 56873423..8407048d 100644 --- a/cmd/hypervisor/main.go +++ b/cmd/hypervisor/main.go @@ -58,12 +58,41 @@ func init() { "Comma separated list of volume directories. If empty, scan for space") } +func printUsage() { + fmt.Fprintln(os.Stderr, + "Usage: hypervisor [flags...] [run|stop|stop-vms-on-next-stop]") + fmt.Fprintln(os.Stderr, "Common flags:") + flag.PrintDefaults() +} + +func processCommand(args []string) { + if len(args) < 1 { + return + } else if len(args) > 1 { + printUsage() + os.Exit(2) + } + switch args[0] { + case "run": + return + case "stop": + requestStop() + case "stop-vms-on-next-stop": + configureVMsToStopOnNextStop() + default: + printUsage() + os.Exit(2) + } +} + func main() { if err := loadflags.LoadForDaemon("hypervisor"); err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } + flag.Usage = printUsage flag.Parse() + processCommand(flag.Args()) if *testMemoryAvailable > 0 { nBytes := *testMemoryAvailable << 20 mem := make([]byte, nBytes) @@ -133,6 +162,9 @@ func main() { if err != nil { logger.Fatalf("Cannot start hypervisor: %s\n", err) } + if err := listenForControl(managerObj, logger); err != nil { + logger.Fatalf("Cannot listen for control: %s\n", err) + } httpd.AddHtmlWriter(managerObj) if len(bridges) < 1 { logger.Println("No bridges found: entering log-only mode") From 399a5c395957d54e48189ccb98c0918754e85417 Mon Sep 17 00:00:00 2001 From: Richard Gooch Date: Thu, 4 Apr 2019 10:48:57 -0700 Subject: [PATCH 3/3] Add/modify systemd services to cleanly shut down VMs on system shutdown. --- Makefile | 3 ++- cmd/hypervisor/install | 1 + init.d/hypervisor.service | 1 + init.d/virtual-machines.service | 12 ++++++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 init.d/virtual-machines.service diff --git a/Makefile b/Makefile index c8757aac..7807d289 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,8 @@ fleet-manager.tarball: @./scripts/make-tarball fleet-manager -C $(ETCDIR) ssl hypervisor.tarball: - @./scripts/make-tarball hypervisor -C $(ETCDIR) ssl + @./scripts/make-tarball hypervisor init.d/virtual-machines.* \ + -C $(ETCDIR) ssl image-unpacker.tarball: @./scripts/make-tarball image-unpacker \ diff --git a/cmd/hypervisor/install b/cmd/hypervisor/install index 7db14c54..358f4c98 100755 --- a/cmd/hypervisor/install +++ b/cmd/hypervisor/install @@ -7,3 +7,4 @@ cd "${0%/*}" . ./scripts/install.lib install_all hypervisor +install_service virtual-machines diff --git a/init.d/hypervisor.service b/init.d/hypervisor.service index d5f20bc1..911d6565 100644 --- a/init.d/hypervisor.service +++ b/init.d/hypervisor.service @@ -5,6 +5,7 @@ After=network.target [Service] KillMode=process ExecStart=/usr/local/sbin/hypervisor +ExecStop=/usr/local/sbin/hypervisor stop ExecReload=/bin/kill -HUP $MAINPID Restart=always RestartSec=1 diff --git a/init.d/virtual-machines.service b/init.d/virtual-machines.service new file mode 100644 index 00000000..2c9f3621 --- /dev/null +++ b/init.d/virtual-machines.service @@ -0,0 +1,12 @@ +[Unit] +Description=Virtual Machines clean shutdown +After=hypervisor.service + +[Service] +Type=oneshot +RemainAfterExit=true +ExecStart=/bin/true +ExecStop=/usr/local/sbin/hypervisor stop-vms-on-next-stop + +[Install] +WantedBy=multi-user.target