Skip to content

Commit

Permalink
runtime: support preemption on windows/{386,amd64}
Browse files Browse the repository at this point in the history
This implements preemptM on Windows using SuspendThead and
ResumeThread.

Unlike on POSIX platforms, preemptM on Windows happens synchronously.
This means we need a make a few other tweaks to suspendG:

1. We need to CAS the G back to _Grunning before doing the preemptM,
   or there's a good chance we'll just catch the G spinning on its
   status in the runtime, which won't be preemptible.

2. We need to rate-limit preemptM attempts. Otherwise, if the first
   attempt catches the G at a non-preemptible point, the busy loop in
   suspendG may hammer it so hard that it never makes it past that
   non-preemptible point.

Updates #10958, #24543.

Change-Id: Ie53b098811096f7e45d864afd292dc9e999ce226
Reviewed-on: https://go-review.googlesource.com/c/go/+/204340
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
  • Loading branch information
aclements committed Nov 20, 2019
1 parent 6fd467e commit b89b462
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 17 deletions.
118 changes: 107 additions & 11 deletions src/runtime/os_windows.go
Expand Up @@ -6,6 +6,7 @@ package runtime

import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)

Expand All @@ -32,6 +33,7 @@ const (
//go:cgo_import_dynamic runtime._GetSystemDirectoryA GetSystemDirectoryA%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetSystemInfo GetSystemInfo%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._GetThreadContext GetThreadContext%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._SetThreadContext SetThreadContext%2 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryW LoadLibraryW%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._LoadLibraryA LoadLibraryA%1 "kernel32.dll"
//go:cgo_import_dynamic runtime._PostQueuedCompletionStatus PostQueuedCompletionStatus%4 "kernel32.dll"
Expand Down Expand Up @@ -79,6 +81,7 @@ var (
_GetSystemInfo,
_GetSystemTimeAsFileTime,
_GetThreadContext,
_SetThreadContext,
_LoadLibraryW,
_LoadLibraryA,
_PostQueuedCompletionStatus,
Expand Down Expand Up @@ -539,6 +542,11 @@ var exiting uint32

//go:nosplit
func exit(code int32) {
// Disallow thread suspension for preemption. Otherwise,
// ExitProcess and SuspendThread can race: SuspendThread
// queues a suspension request for this thread, ExitProcess
// kills the suspending thread, and then this thread suspends.
lock(&suspendLock)
atomic.Store(&exiting, 1)
stdcall1(_ExitProcess, uintptr(code))
}
Expand Down Expand Up @@ -1003,19 +1011,22 @@ func profilem(mp *m, thread uintptr) {
r.contextflags = _CONTEXT_CONTROL
stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(r)))

var gp *g
gp := gFromTLS(mp)

sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
}

func gFromTLS(mp *m) *g {
switch GOARCH {
default:
panic("unsupported architecture")
case "arm":
tls := &mp.tls[0]
gp = **((***g)(unsafe.Pointer(tls)))
return **((***g)(unsafe.Pointer(tls)))
case "386", "amd64":
tls := &mp.tls[0]
gp = *((**g)(unsafe.Pointer(tls)))
return *((**g)(unsafe.Pointer(tls)))
}

sigprof(r.ip(), r.sp(), r.lr(), gp, mp)
throw("unsupported architecture")
return nil
}

func profileloop1(param uintptr) uint32 {
Expand Down Expand Up @@ -1081,10 +1092,95 @@ func setThreadCPUProfiler(hz int32) {
atomic.Store((*uint32)(unsafe.Pointer(&getg().m.profilehz)), uint32(hz))
}

const preemptMSupported = false
const preemptMSupported = GOARCH != "arm"

// suspendLock protects simultaneous SuspendThread operations from
// suspending each other.
var suspendLock mutex

func preemptM(mp *m) {
// Not currently supported.
//
// TODO: Use SuspendThread/GetThreadContext/ResumeThread
if GOARCH == "arm" {
// TODO: Implement call injection
return
}

if mp == getg().m {
throw("self-preempt")
}

// Acquire our own handle to mp's thread.
lock(&mp.threadLock)
if mp.thread == 0 {
// The M hasn't been minit'd yet (or was just unminit'd).
unlock(&mp.threadLock)
atomic.Xadd(&mp.preemptGen, 1)
return
}
var thread uintptr
stdcall7(_DuplicateHandle, currentProcess, mp.thread, currentProcess, uintptr(unsafe.Pointer(&thread)), 0, 0, _DUPLICATE_SAME_ACCESS)
unlock(&mp.threadLock)

// Prepare thread context buffer.
var c *context
cbuf := make([]byte, unsafe.Sizeof(*c)+15)
// Align Context to 16 bytes.
c = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&cbuf[15]))) &^ 15))
c.contextflags = _CONTEXT_CONTROL

// Serialize thread suspension. SuspendThread is asynchronous,
// so it's otherwise possible for two threads to suspend each
// other and deadlock. We must hold this lock until after
// GetThreadContext, since that blocks until the thread is
// actually suspended.
lock(&suspendLock)

// Suspend the thread.
if int32(stdcall1(_SuspendThread, thread)) == -1 {
unlock(&suspendLock)
stdcall1(_CloseHandle, thread)
// The thread no longer exists. This shouldn't be
// possible, but just acknowledge the request.
atomic.Xadd(&mp.preemptGen, 1)
return
}

// We have to be very careful between this point and once
// we've shown mp is at an async safe-point. This is like a
// signal handler in the sense that mp could have been doing
// anything when we stopped it, including holding arbitrary
// locks.

// We have to get the thread context before inspecting the M
// because SuspendThread only requests a suspend.
// GetThreadContext actually blocks until it's suspended.
stdcall2(_GetThreadContext, thread, uintptr(unsafe.Pointer(c)))

unlock(&suspendLock)

// Does it want a preemption and is it safe to preempt?
gp := gFromTLS(mp)
if wantAsyncPreempt(gp) && isAsyncSafePoint(gp, c.ip(), c.sp(), c.lr()) {
// Inject call to asyncPreempt
targetPC := funcPC(asyncPreempt)
switch GOARCH {
default:
throw("unsupported architecture")
case "386", "amd64":
// Make it look like the thread called targetPC.
pc := c.ip()
sp := c.sp()
sp -= sys.PtrSize
*(*uintptr)(unsafe.Pointer(sp)) = pc
c.set_sp(sp)
c.set_ip(targetPC)
}

stdcall2(_SetThreadContext, thread, uintptr(unsafe.Pointer(c)))
}

// Acknowledge the preemption.
atomic.Xadd(&mp.preemptGen, 1)

stdcall1(_ResumeThread, thread)
stdcall1(_CloseHandle, thread)
}
31 changes: 25 additions & 6 deletions src/runtime/preempt.go
Expand Up @@ -118,6 +118,7 @@ func suspendG(gp *g) suspendGState {
stopped := false
var asyncM *m
var asyncGen uint32
var nextPreemptM int64
for i := 0; ; i++ {
switch s := readgstatus(gp); s {
default:
Expand Down Expand Up @@ -205,14 +206,32 @@ func suspendG(gp *g) suspendGState {
gp.preempt = true
gp.stackguard0 = stackPreempt

// Send asynchronous preemption.
asyncM = gp.m
asyncGen = atomic.Load(&asyncM.preemptGen)
if preemptMSupported && debug.asyncpreemptoff == 0 {
preemptM(asyncM)
}
// Prepare for asynchronous preemption.
asyncM2 := gp.m
asyncGen2 := atomic.Load(&asyncM2.preemptGen)
needAsync := asyncM != asyncM2 || asyncGen != asyncGen2
asyncM = asyncM2
asyncGen = asyncGen2

casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)

// Send asynchronous preemption. We do this
// after CASing the G back to _Grunning
// because preemptM may be synchronous and we
// don't want to catch the G just spinning on
// its status.
if preemptMSupported && debug.asyncpreemptoff == 0 && needAsync {
// Rate limit preemptM calls. This is
// particularly important on Windows
// where preemptM is actually
// synchronous and the spin loop here
// can lead to live-lock.
now := nanotime()
if now >= nextPreemptM {
nextPreemptM = now + yieldDelay/2
preemptM(asyncM)
}
}
}

// TODO: Don't busy wait. This loop should really only
Expand Down

0 comments on commit b89b462

Please sign in to comment.