diff --git a/cni/ipam/plugin/main.go b/cni/ipam/plugin/main.go index 7278cb956e..15602aee93 100644 --- a/cni/ipam/plugin/main.go +++ b/cni/ipam/plugin/main.go @@ -42,11 +42,19 @@ func main() { if err := ipamPlugin.Plugin.InitializeKeyValueStore(&config); err != nil { fmt.Printf("Failed to initialize key-value store of ipam plugin, err:%v.\n", err) + + if isSafe, err := ipamPlugin.Plugin.IsSafeToRemoveLock(ipamPlugin.Plugin.Name); isSafe { + log.Printf("[IPAM] Removing lock file as process holding lock exited") + if errUninit := ipamPlugin.Plugin.UninitializeKeyValueStore(true); errUninit != nil { + log.Errorf("Failed to uninitialize key-value store of network plugin, err:%v.\n", errUninit) + } + } + os.Exit(1) } defer func() { - if errUninit := ipamPlugin.Plugin.UninitializeKeyValueStore(); errUninit != nil { + if errUninit := ipamPlugin.Plugin.UninitializeKeyValueStore(false); errUninit != nil { fmt.Printf("Failed to uninitialize key-value store of ipam plugin, err:%v.\n", err) } diff --git a/cni/network/plugin/main.go b/cni/network/plugin/main.go index 211a763995..14c6fc84c2 100644 --- a/cni/network/plugin/main.go +++ b/cni/network/plugin/main.go @@ -187,20 +187,19 @@ func main() { reportPluginError(reportManager, tb, err) tb.Close() } - return - } - // Start telemetry process if not already started. This should be done inside lock, otherwise multiple process - // end up creating/killing telemetry process results in undesired state. - tb := telemetry.NewTelemetryBuffer("") - tb.ConnectToTelemetryService(telemetryNumRetries, telemetryWaitTimeInMilliseconds) - defer tb.Close() + if isSafe, err := netPlugin.Plugin.IsSafeToRemoveLock(name); isSafe { + log.Printf("[CNI] Removing lock file as process holding lock exited") + if errUninit := netPlugin.Plugin.UninitializeKeyValueStore(true); errUninit != nil { + log.Errorf("Failed to uninitialize key-value store of network plugin, err:%v.\n", errUninit) + } + } - t := time.Now() - cniReport.Timestamp = t.Format("2006-01-02 15:04:05") + return + } defer func() { - if errUninit := netPlugin.Plugin.UninitializeKeyValueStore(); errUninit != nil { + if errUninit := netPlugin.Plugin.UninitializeKeyValueStore(false); errUninit != nil { log.Errorf("Failed to uninitialize key-value store of network plugin, err:%v.\n", errUninit) } @@ -209,6 +208,15 @@ func main() { } }() + // Start telemetry process if not already started. This should be done inside lock, otherwise multiple process + // end up creating/killing telemetry process results in undesired state. + tb := telemetry.NewTelemetryBuffer("") + tb.ConnectToTelemetryService(telemetryNumRetries, telemetryWaitTimeInMilliseconds) + defer tb.Close() + + t := time.Now() + cniReport.Timestamp = t.Format("2006-01-02 15:04:05") + if err = netPlugin.Start(&config); err != nil { log.Errorf("Failed to start network plugin, err:%v.\n", err) reportPluginError(reportManager, tb, err) @@ -228,7 +236,7 @@ func main() { netPlugin.Stop() // release cni lock - if errUninit := netPlugin.Plugin.UninitializeKeyValueStore(); errUninit != nil { + if errUninit := netPlugin.Plugin.UninitializeKeyValueStore(false); errUninit != nil { log.Errorf("Failed to uninitialize key-value store of network plugin, err:%v.\n", errUninit) } diff --git a/cni/plugin.go b/cni/plugin.go index 0a6cb7f0cb..a1ed3fe338 100644 --- a/cni/plugin.go +++ b/cni/plugin.go @@ -6,6 +6,7 @@ package cni import ( "context" "fmt" + "io/ioutil" "os" "runtime" @@ -186,9 +187,9 @@ func (plugin *Plugin) InitializeKeyValueStore(config *common.PluginConfig) error } // Uninitialize key-value store -func (plugin *Plugin) UninitializeKeyValueStore() error { +func (plugin *Plugin) UninitializeKeyValueStore(force bool) error { if plugin.Store != nil { - err := plugin.Store.Unlock(false) + err := plugin.Store.Unlock(force) if err != nil { log.Printf("[cni] Failed to unlock store: %v.", err) return err @@ -198,3 +199,44 @@ func (plugin *Plugin) UninitializeKeyValueStore() error { return nil } + +// check if safe to remove lockfile +func (plugin *Plugin) IsSafeToRemoveLock(processName string) (bool, error) { + if plugin != nil && plugin.Store != nil { + // check if get process command supported + if cmdErr := platform.GetProcessSupport(); cmdErr != nil { + log.Errorf("Get process cmd not supported. Error %v", cmdErr) + return false, cmdErr + } + + // Read pid from lockfile + lockFileName := plugin.Store.GetLockFileName() + content, err := ioutil.ReadFile(lockFileName) + if err != nil { + log.Errorf("Failed to read lock file :%v, ", err) + return false, err + } + + if len(content) <= 0 { + log.Errorf("Num bytes read from lock file is 0") + return false, fmt.Errorf("Num bytes read from lock file is 0") + } + + log.Printf("Read from Lock file:%s", content) + // Get the process name if running and + // check if that matches with our expected process + pName, err := platform.GetProcessNameByID(string(content)) + if err != nil { + return true, nil + } + + log.Printf("[CNI] Process name is %s", pName) + + if pName != processName { + return true, nil + } + } + + log.Errorf("Plugin store is nil") + return false, fmt.Errorf("plugin store nil") +} diff --git a/platform/os_linux.go b/platform/os_linux.go index fc8cbaf898..1a00609ddb 100644 --- a/platform/os_linux.go +++ b/platform/os_linux.go @@ -7,6 +7,7 @@ import ( "bytes" "fmt" "io/ioutil" + "os" "os/exec" "strings" "time" @@ -44,6 +45,12 @@ func GetOSInfo() string { return string(info) } +func GetProcessSupport() error { + cmd := fmt.Sprintf("ps -p %v -o comm=", os.Getpid()) + _, err := ExecuteCommand(cmd) + return err +} + // GetLastRebootTime returns the last time the system rebooted. func GetLastRebootTime() (time.Time, error) { // Query last reboot time. @@ -127,3 +134,18 @@ func GetOSDetails() (map[string]string, error) { return osInfoArr, nil } + +func GetProcessNameByID(pidstr string) (string, error) { + pidstr = strings.Trim(pidstr, "\n") + cmd := fmt.Sprintf("ps -p %s -o comm=", pidstr) + out, err := ExecuteCommand(cmd) + if err != nil { + log.Printf("GetProcessNameByID returned error: %v", err) + return "", err + } + + out = strings.Trim(out, "\n") + out = strings.TrimSpace(out) + + return out, nil +} diff --git a/platform/os_test.go b/platform/os_test.go index 124f6153b7..38048eaecd 100644 --- a/platform/os_test.go +++ b/platform/os_test.go @@ -2,6 +2,8 @@ package platform import ( "os" + "strconv" + "strings" "testing" ) @@ -23,3 +25,14 @@ func TestGetOSDetails(t *testing.T) { t.Errorf("GetOSDetails failed :%v", err) } } + +func TestGetProcessNameByID(t *testing.T) { + pName, err := GetProcessNameByID(strconv.Itoa(os.Getpid())) + if err != nil { + t.Errorf("GetProcessNameByID failed: %v", err) + } + + if !strings.Contains(pName, "platform.test") { + t.Errorf("Incorrect process name:%v\n", pName) + } +} diff --git a/platform/os_windows.go b/platform/os_windows.go index e8bc2c6f4a..0900e74462 100644 --- a/platform/os_windows.go +++ b/platform/os_windows.go @@ -6,6 +6,7 @@ package platform import ( "bytes" "fmt" + "os" "os/exec" "strings" "time" @@ -60,6 +61,12 @@ func GetOSInfo() string { return "windows" } +func GetProcessSupport() error { + cmd := fmt.Sprintf("Get-Process -Id %v", os.Getpid()) + _, err := executePowershellCommand(cmd) + return err +} + // GetLastRebootTime returns the last time the system rebooted. func GetLastRebootTime() (time.Time, error) { out, err := exec.Command("cmd", "/c", "wmic os get lastbootuptime").Output() @@ -145,12 +152,18 @@ func executePowershellCommand(command string) (string, error) { return "", fmt.Errorf("Failed to find powershell executable") } + log.Printf("[Azure-Utils] %s", command) + cmd := exec.Command(ps, command) var stdout bytes.Buffer var stderr bytes.Buffer cmd.Stdout = &stdout cmd.Stderr = &stderr - cmd.Run() + + err = cmd.Run() + if err != nil { + return "", fmt.Errorf("%s:%s", err.Error(), stderr.String()) + } return strings.TrimSpace(stdout.String()), nil } @@ -186,3 +199,30 @@ func SetSdnRemoteArpMacAddress() error { func GetOSDetails() (map[string]string, error) { return nil, nil } + +func GetProcessNameByID(pidstr string) (string, error) { + pidstr = strings.Trim(pidstr, "\r\n") + cmd := fmt.Sprintf("Get-Process -Id %s|Format-List", pidstr) + out, err := executePowershellCommand(cmd) + if err != nil { + log.Printf("Process is not running. Output:%v, Error %v", out, err) + return "", err + } + + if len(out) <= 0 { + log.Printf("Output length is 0") + return "", fmt.Errorf("get-process output length is 0") + } + + lines := strings.Split(out, "\n") + for _, line := range lines { + if strings.Contains(line, "Name") { + pName := strings.Split(line, ":") + if len(pName) > 1 { + return strings.TrimSpace(pName[1]), nil + } + } + } + + return "", fmt.Errorf("Process not found") +} diff --git a/store/json.go b/store/json.go index 65f18b4d55..8945ad26ee 100644 --- a/store/json.go +++ b/store/json.go @@ -241,3 +241,7 @@ func (kvs *jsonFileStore) GetLockFileModificationTime() (time.Time, error) { return info.ModTime().UTC(), nil } + +func (kvs *jsonFileStore) GetLockFileName() string { + return kvs.fileName + lockExtension +} diff --git a/store/store.go b/store/store.go index 2e505ba2ed..ef648a161c 100644 --- a/store/store.go +++ b/store/store.go @@ -17,6 +17,7 @@ type KeyValueStore interface { Unlock(forceUnlock bool) error GetModificationTime() (time.Time, error) GetLockFileModificationTime() (time.Time, error) + GetLockFileName() string } var (