Skip to content
This repository has been archived by the owner on Jan 10, 2023. It is now read-only.

Commit

Permalink
Remove launchguard
Browse files Browse the repository at this point in the history
  • Loading branch information
sargun committed May 25, 2018
1 parent 916c8ea commit acde9fb
Show file tree
Hide file tree
Showing 10 changed files with 18 additions and 1,130 deletions.
30 changes: 0 additions & 30 deletions cmd/titus-launchguard-server/main.go

This file was deleted.

104 changes: 18 additions & 86 deletions executor/runner/runner.go
Original file line number Diff line number Diff line change
@@ -1,35 +1,27 @@
package runner

import (
"context"
"sync"
"time"

"github.com/Netflix/metrics-client-go/metrics"
"github.com/Netflix/titus-executor/uploader"

launchguardClient "github.com/Netflix/titus-executor/launchguard/client"
launchguardCore "github.com/Netflix/titus-executor/launchguard/core"

"github.com/Netflix/titus-executor/executor/runtime"
"github.com/Netflix/titus-executor/executor/runtime/docker"
runtimeTypes "github.com/Netflix/titus-executor/executor/runtime/types"

"errors"
"fmt"
"os"

"github.com/Netflix/titus-executor/api/netflix/titus"
"github.com/Netflix/titus-executor/config"
"github.com/Netflix/titus-executor/executor/drivers"
"github.com/Netflix/titus-executor/executor/metatron"
"github.com/Netflix/titus-executor/executor/runtime"
"github.com/Netflix/titus-executor/executor/runtime/docker"
runtimeTypes "github.com/Netflix/titus-executor/executor/runtime/types"
"github.com/Netflix/titus-executor/filesystems"
"github.com/Netflix/titus-executor/models"
"github.com/Netflix/titus-executor/uploader"
"github.com/sirupsen/logrus"

"context"
"errors"
"fmt"
"os"
"sync"
"time"
)

// WaitingOnLaunchguardMessage is the status message we send to the master while we wait for launchguard
const WaitingOnLaunchguardMessage = "waiting_on_launchguard"
const waitForTaskTimeout = 5 * time.Minute

var (
Expand All @@ -50,11 +42,10 @@ type task struct {
// Runner maintains in memory state for the task runner
type Runner struct { // nolint: maligned
// const:
metrics metrics.Reporter
runtime runtimeTypes.Runtime
launchGuard *launchguardClient.LaunchGuardClient
config config.Config
logger *logrus.Entry
metrics metrics.Reporter
runtime runtimeTypes.Runtime
config config.Config
logger *logrus.Entry

container *runtimeTypes.Container
watcher *filesystems.Watcher
Expand Down Expand Up @@ -86,16 +77,10 @@ func New(ctx context.Context, m metrics.Reporter, logUploaders *uploader.Uploade

// WithRuntime builds an Executor using the provided Runtime factory func
func WithRuntime(ctx context.Context, m metrics.Reporter, rp RuntimeProvider, logUploaders *uploader.Uploaders, cfg config.Config) (*Runner, error) {
lgc, err := launchguardClient.NewLaunchGuardClient(m, "http://localhost:8006")
if err != nil {
return nil, err // nolint: vet
}

runner := &Runner{
logger: logrus.NewEntry(logrus.StandardLogger()),
metrics: m,
logUploaders: logUploaders,
launchGuard: lgc,
config: cfg,
taskChan: make(chan task, 1),
killChan: make(chan struct{}),
Expand All @@ -109,8 +94,8 @@ func WithRuntime(ctx context.Context, m metrics.Reporter, rp RuntimeProvider, lo
// Kill the running container if there is one, shut it down
runner.Kill()
}()
err = <-setupCh
if err != nil {

if err := <-setupCh; err != nil {
return nil, err
}

Expand Down Expand Up @@ -153,6 +138,7 @@ func (r *Runner) startRunner(parentCtx context.Context, setupCh chan error, rp R
defer cancel()
defer close(r.StoppedChan)

// We must ensure that setupCh is closed, or returns an error.
if err := r.setupRunner(ctx, rp); err != nil {
setupCh <- err
return
Expand Down Expand Up @@ -189,17 +175,6 @@ func (r *Runner) startRunner(parentCtx context.Context, setupCh chan error, rp R
}
r.container = runtime.NewContainer(taskConfig.taskID, taskConfig.titusInfo, resources, labels, r.config)

// TODO: Wire up cleanup callback
var le launchguardCore.LaunchEvent = &launchguardCore.NoopLaunchEvent{}

if r.container.TitusInfo.GetIgnoreLaunchGuard() {
r.logger.Info("Ignoring Launchguard")
} else {
// Wait until the launchGuard is released.
// TODO(Andrew L): We only block concurrent launches to avoid a race condition introduced
// by the Titus master releasing resources prior to the agent releasing them.
le = r.launchGuard.NewLaunchEvent(ctx, r.container.TitusInfo.GetNetworkConfigInfo().GetEniLabel())
}
if r.config.MetatronEnabled {
err = r.setupMetatron()
if err != nil {
Expand All @@ -212,30 +187,6 @@ func (r *Runner) startRunner(parentCtx context.Context, setupCh chan error, rp R

// At this point we've begun starting, and we need to explicitly inform the master when the task finishes
defer r.handleShutdown(ctx)
select {
case <-le.Launch():
r.logger.Info("Launch not blocked on on launchGuard")
goto no_launchguard
default:
r.logger.Info("Launch waiting on launchGuard")
r.updateStatus(ctx, titusdriver.Starting, WaitingOnLaunchguardMessage)

}
select {
case <-le.Launch():
r.logger.Info("No longer waiting on launchGuard")
case <-r.killChan:
r.logger.Warning("Killed while waiting on launchguard")
return
case <-ctx.Done():
r.logger.Warning("local context done while waiting on launchguard")
return
case <-parentCtx.Done():
r.logger.Warning("Parent context done while waiting on launchguard")
return
}

no_launchguard:

select {
case <-r.killChan:
Expand Down Expand Up @@ -378,15 +329,7 @@ func (r *Runner) handleTaskRunningMessage(ctx context.Context, msg string, lastM

func (r *Runner) handleShutdown(ctx context.Context) { // nolint: gocyclo
r.logger.Debug("Handling shutdown")
launchGuardCtx, cancel := context.WithCancel(context.Background())
defer cancel()
var cleanupErrs []error
var ce launchguardCore.CleanUpEvent = &launchguardCore.NoopCleanUpEvent{}

if r.wasKilled() {
r.logger.Info("Setting launchGuard while stopping task")
ce = r.launchGuard.NewRealCleanUpEvent(launchGuardCtx, r.container.TitusInfo.GetNetworkConfigInfo().GetEniLabel())
}

killStartTime := time.Now()
// Are we in a situation where the container exited gracefully, or less than gracefully?
Expand All @@ -402,17 +345,6 @@ func (r *Runner) handleShutdown(ctx context.Context) { // nolint: gocyclo
cleanupErrs = append(cleanupErrs, err)
}
}
/* If this flag is not set to true, we've been launched by the v2 engine
* therefore we can have a task started on this ENI instanteoously after a launch
*
* Otherwise, we hold the launchguard until all cleanup is completed
*/
if !r.container.TitusInfo.GetIgnoreLaunchGuard() {
r.logger.Info("Unsetting launchguard")
ce.Done()
} else {
defer ce.Done()
}

if r.watcher != nil {
if err := r.watcher.Stop(); err != nil {
Expand Down
111 changes: 0 additions & 111 deletions executor/runner/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@ import (
"testing"
"time"

"net/http/httptest"

"github.com/Netflix/metrics-client-go/metrics"
"github.com/Netflix/titus-executor/api/netflix/titus"
"github.com/Netflix/titus-executor/config"
"github.com/Netflix/titus-executor/executor/drivers"
runtimeTypes "github.com/Netflix/titus-executor/executor/runtime/types"
"github.com/Netflix/titus-executor/launchguard/client"
"github.com/Netflix/titus-executor/launchguard/server"
"github.com/Netflix/titus-executor/uploader"
"github.com/gogo/protobuf/proto"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -294,110 +290,3 @@ done:
t.Fatal("Kill timeout received")
}
}

// test the launchGuard, it has caused too many deadlocks.
func TestHoldsLaunchesUntilStopFinishes(t *testing.T) { // nolint: gocyclo
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

var (
taskID = "Titus-123-worker-0-2"
image = "titusops/alpine"
taskInfo = &titus.ContainerInfo{
ImageName: &image,
}
kills1 = make(chan chan<- struct{}, 1)
launched1 = make(chan struct{})
_, e1 = mocks(ctx, t, kills1, launched1)

kills2 = make(chan chan<- struct{}, 1)
launched2 = make(chan struct{})
_, e2 = mocks(ctx, t, kills2, launched2)
)

defer func() {
<-e1.StoppedChan
<-e2.StoppedChan

}()
// one task is running
if err := e1.StartTask(taskID, taskInfo, 512, 1, 1024); err != nil {
t.Fatal(err)
}

// wait for it to be up
select {
case <-time.After(5 * time.Second):
t.Fatalf("Task %s not RUNNING after 5s", taskID)
case <-launched1: // OK
}

// a pending Kill hangs until we tell it to proceed
go e1.Kill()
// wait for the Kill to begin
var killReq chan<- struct{}

select {
case killReq = <-kills1:
case <-time.After(5 * time.Second):
t.Fatal("The Kill operation has not started yet after 5s")
}

go func() {
if err := e2.StartTask("A-New-Task", taskInfo, 512, 1, 1024); err != nil {
t.Error(err)
}
}()

select {
case <-launched2:
t.Fatal("Executor must wait until the pending kill finish before launching tasks")
default: // OK, expected
}

close(killReq) // let the kill finish
select {
case <-launched2: // OK, expected
case <-time.After(5 * time.Second):
t.Fatal("Executor did not launch pending task within 5s after all kills finished")
}
cancel()
}

func mocks(ctx context.Context, t *testing.T, killRequests chan<- chan<- struct{}, taskLaunched chan struct{}) (*runtimeMock, *Runner) {
lgs := httptest.NewServer(server.NewLaunchGuardServer(metrics.Discard))

r := &runtimeMock{
t: t,
startCalled: make(chan<- struct{}),
kills: killRequests,
ctx: ctx,
statusChan: make(chan runtimeTypes.StatusMessage, 10),
}
l := uploader.NewUploadersFromUploaderArray([]uploader.Uploader{&uploader.NoopUploader{}})
cfg := config.Config{}

e, err := WithRuntime(ctx, metrics.Discard, func(ctx context.Context, _cfg config.Config) (runtimeTypes.Runtime, error) {
return r, nil
}, l, cfg)
if err != nil {
t.Fatal(err)
}

e.launchGuard, err = client.NewLaunchGuardClient(metrics.Discard, lgs.URL)
require.NoError(t, err)

go drain(t, e, taskLaunched)
return r, e
}

// drain the status channel allow others to be notified when particular Tasks are RUNNING
func drain(t *testing.T, e *Runner, taskLaunched chan struct{}) {
for status := range e.UpdatesChan {
t.Logf("Reported status: %+v", status)
if status.State.String() == "TASK_RUNNING" {
close(taskLaunched)
}
}
t.Log("Drain complete")
}
1 change: 0 additions & 1 deletion hack/builder/titus-executor-builder.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ fi
cat <<-EOF >/tmp/post-install.sh
#!/bin/bash
systemctl enable titus-darion.service
systemctl enable titus-launchguard.service
systemctl enable titus-reaper.service
systemctl enable titus-setup-networking.timer
systemctl enable titus-vpc-gc.timer
Expand Down
Loading

0 comments on commit acde9fb

Please sign in to comment.