Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,19 @@ On merge, CI will:

## [Unreleased]

_Add unreleased changes here._
### Added

- Sentry events now carry `app`, `process`, `region`, and `server_name` tags
identifying the Fly app, binary, region, and machine that emitted them.
Review-app errors previously had none of these, so bursts couldn't be
attributed to a specific deploy. New helpers `logging.InitSentry` and
`observability.StartMetricsServer` centralise the duplicated bootstrap across
`cmd/app`, `cmd/worker`, and `cmd/analysis`.

### Fixed

- `cmd/analysis` now gracefully shuts down its metrics HTTP server on SIGTERM;
previously it spawned the listener but never called `Shutdown`.

## Full changelog history

Expand Down
54 changes: 11 additions & 43 deletions cmd/analysis/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ import (
"context"
"errors"
"fmt"
"net"
"net/http"
"net/http/pprof"
"os"
"os/signal"
"strconv"
Expand All @@ -23,7 +20,6 @@ import (
"github.com/Harvey-AU/hover/internal/lighthouse"
"github.com/Harvey-AU/hover/internal/logging"
"github.com/Harvey-AU/hover/internal/observability"
"github.com/getsentry/sentry-go"
"github.com/redis/go-redis/v9"
)

Expand All @@ -42,18 +38,15 @@ func main() {
appEnv := os.Getenv("APP_ENV")

// Sentry first so logging.Setup can wire its handler.
if dsn := os.Getenv("SENTRY_DSN"); dsn != "" {
if err := sentry.Init(sentry.ClientOptions{
Dsn: dsn,
Environment: appEnv,
AttachStacktrace: true,
BeforeSend: logging.BeforeSend,
}); err != nil {
fmt.Fprintf(os.Stderr, "failed to initialise Sentry: %v\n", err)
} else {
defer sentry.Flush(2 * time.Second)
}
sentryFlush, err := logging.InitSentry(logging.SentryOptions{
DSN: os.Getenv("SENTRY_DSN"),
Environment: appEnv,
Process: "analysis",
})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to initialise Sentry: %v\n", err)
}
defer sentryFlush()

logging.Setup(logging.ParseLevel(os.Getenv("LOG_LEVEL")), appEnv)
analysisLog.Info("hover analysis starting")
Expand All @@ -67,47 +60,22 @@ func main() {
if metricsAddr == "" {
metricsAddr = ":9464"
}
providers, err := observability.Init(context.Background(), observability.Config{
Enabled: true,
metricsSrv, err := observability.StartMetricsServer(context.Background(), observability.MetricsServerOptions{
ServiceName: serviceName,
Environment: appEnv,
OTLPEndpoint: strings.TrimSpace(os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")),
OTLPHeaders: observability.ParseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")),
MetricsAddress: metricsAddr,
EnablePprof: true,
})
if err != nil {
analysisLog.Warn("failed to initialise observability", "error", err)
} else {
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = providers.Shutdown(ctx)
metricsSrv.Shutdown(ctx)
}()
if providers.MetricsHandler != nil && metricsAddr != "" {
mux := http.NewServeMux()
mux.Handle("/metrics", providers.MetricsHandler)
mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
metricsSrv := &http.Server{
Addr: metricsAddr,
Handler: mux,
ReadHeaderTimeout: 5 * time.Second,
}
listener, err := net.Listen("tcp", metricsAddr)
if err != nil {
analysisLog.Error("metrics server failed to bind", "error", err, "addr", metricsAddr)
} else {
go func() {
analysisLog.Info("metrics server listening", "addr", metricsAddr)
if err := metricsSrv.Serve(listener); err != nil && !errors.Is(err, http.ErrServerClosed) {
analysisLog.Error("metrics server failed", "error", err)
}
}()
}
}
}
}

Expand Down
74 changes: 21 additions & 53 deletions cmd/app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/Harvey-AU/hover/internal/loops"
"github.com/Harvey-AU/hover/internal/notifications"
"github.com/Harvey-AU/hover/internal/observability"
"github.com/getsentry/sentry-go"
"github.com/joho/godotenv"
"golang.org/x/time/rate"
)
Expand Down Expand Up @@ -403,28 +402,20 @@ func main() {
}()
}

var err error

// Init before setupLogging so the fanout handler wires the existing client.
if config.SentryDSN != "" {
err := sentry.Init(sentry.ClientOptions{
Dsn: config.SentryDSN,
Environment: config.Env,
TracesSampleRate: func() float64 {
if config.Env == "production" {
return 0.1
}
return 1.0
}(),
AttachStacktrace: true,
sentryFlush, err := logging.InitSentry(logging.SentryOptions{
DSN: config.SentryDSN,
Environment: config.Env,
Process: "app",
TracesSampleRate: appTracesSampleRate(config.Env),
Debug: config.Env == "development",
BeforeSend: logging.BeforeSend,
})
if err != nil {
startupLog.Warn("Failed to initialise Sentry", "error", err)
} else {
startupLog.Info("Sentry initialised successfully", "environment", config.Env)
defer sentry.Flush(2 * time.Second)
defer sentryFlush()
}
} else {
startupLog.Warn("Sentry DSN not configured, error tracking disabled")
Expand All @@ -437,19 +428,17 @@ func main() {
}
}()

var (
obsProviders *observability.Providers
metricsSrv *http.Server
)
var obsProviders *observability.Providers

if config.ObservabilityEnabled {
// FLY_APP_NAME distinguishes review apps (hover-pr-342) from prod.
serviceName := strings.TrimSpace(os.Getenv("FLY_APP_NAME"))
if serviceName == "" {
serviceName = "hover"
}
obsProviders, err = observability.Init(context.Background(), observability.Config{
Enabled: true,
// EnablePprof intentionally false — pprof on cmd/app would expose a
// debug surface alongside the public HTTP listener.
metricsSrv, err := observability.StartMetricsServer(context.Background(), observability.MetricsServerOptions{
ServiceName: serviceName,
Environment: config.Env,
OTLPEndpoint: strings.TrimSpace(config.OTLPEndpoint),
Expand All @@ -460,42 +449,12 @@ func main() {
if err != nil {
startupLog.Warn("Failed to initialise observability providers", "error", err)
} else {
obsProviders = metricsSrv.Providers
defer func() {
shutdownCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := obsProviders.Shutdown(shutdownCtx); err != nil {
startupLog.Warn("Failed to flush telemetry providers cleanly", "error", err)
}
metricsSrv.Shutdown(shutdownCtx)
}()

if obsProviders.MetricsHandler != nil && config.MetricsAddr != "" {
metricsSrv = &http.Server{
Addr: config.MetricsAddr,
Handler: obsProviders.MetricsHandler,
ReadHeaderTimeout: 5 * time.Second,
}

// Bind before logging readiness so a bind failure surfaces correctly.
metricsListener, err := net.Listen("tcp", config.MetricsAddr)
if err != nil {
startupLog.Error("Metrics server failed to bind", "error", err, "addr", config.MetricsAddr)
} else {
go func() {
startupLog.Info("Metrics server listening", "addr", config.MetricsAddr)
if err := metricsSrv.Serve(metricsListener); err != nil && !errors.Is(err, http.ErrServerClosed) {
startupLog.Error("Metrics server failed", "error", err)
}
}()

defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := metricsSrv.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
startupLog.Warn("Graceful shutdown of metrics server failed", "error", err)
}
}()
}
}
}
}

Expand Down Expand Up @@ -793,6 +752,15 @@ func getEnvWithDefault(key, defaultValue string) string {
return value
}

// Production runs at 10% to keep trace volume within budget; non-prod
// environments sample fully so review apps emit a usable trace stream.
func appTracesSampleRate(env string) float64 {
if env == "production" {
return 0.1
}
return 1.0
}

func setupLogging(config *Config) {
logging.Setup(logging.ParseLevel(config.LogLevel), config.Env)
}
Expand Down
69 changes: 14 additions & 55 deletions cmd/worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@ package main
import (
"context"
"database/sql"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"net/http/pprof"
"os"
"os/signal"
"strconv"
Expand All @@ -25,7 +21,6 @@ import (
"github.com/Harvey-AU/hover/internal/logging"
"github.com/Harvey-AU/hover/internal/observability"
"github.com/Harvey-AU/hover/internal/watchdog"
"github.com/getsentry/sentry-go"
"github.com/lib/pq"
)

Expand All @@ -35,18 +30,15 @@ func main() {
appEnv := os.Getenv("APP_ENV")

// Init before logging.Setup so the sentry slog handler can attach.
if dsn := os.Getenv("SENTRY_DSN"); dsn != "" {
if err := sentry.Init(sentry.ClientOptions{
Dsn: dsn,
Environment: appEnv,
AttachStacktrace: true,
BeforeSend: logging.BeforeSend,
}); err != nil {
fmt.Fprintf(os.Stderr, "failed to initialise Sentry: %v\n", err)
} else {
defer sentry.Flush(2 * time.Second)
}
sentryFlush, err := logging.InitSentry(logging.SentryOptions{
DSN: os.Getenv("SENTRY_DSN"),
Environment: appEnv,
Process: "worker",
})
if err != nil {
fmt.Fprintf(os.Stderr, "failed to initialise Sentry: %v\n", err)
}
defer sentryFlush()

logging.Setup(logging.ParseLevel(os.Getenv("LOG_LEVEL")), appEnv)
defer flushAsyncLogs()
Expand All @@ -63,58 +55,25 @@ func main() {
if metricsAddr == "" {
metricsAddr = ":9464"
}
providers, err := observability.Init(context.Background(), observability.Config{
Enabled: true,
// Alloy sidecar scrapes /metrics here to add app/environment labels;
// pure OTLP push bypasses the dashboard's app filter. pprof shares the
// port (Fly internal network only, no auth guard needed).
metricsSrv, err := observability.StartMetricsServer(context.Background(), observability.MetricsServerOptions{
ServiceName: serviceName,
Environment: appEnv,
OTLPEndpoint: strings.TrimSpace(os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")),
OTLPHeaders: observability.ParseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")),
MetricsAddress: metricsAddr,
EnablePprof: true,
})
if err != nil {
workerLog.Warn("failed to initialise observability", "error", err)
} else {
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = providers.Shutdown(ctx)
metricsSrv.Shutdown(ctx)
}()

// Alloy sidecar scrapes /metrics here to add app/environment labels;
// pure OTLP push bypasses the dashboard's app filter. pprof shares the
// port (Fly internal network only, no auth guard needed).
if providers.MetricsHandler != nil && metricsAddr != "" {
mux := http.NewServeMux()
mux.Handle("/metrics", providers.MetricsHandler)
mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
metricsSrv := &http.Server{
Addr: metricsAddr,
Handler: mux,
ReadHeaderTimeout: 5 * time.Second,
}
metricsListener, err := net.Listen("tcp", metricsAddr)
if err != nil {
workerLog.Error("metrics server failed to bind", "error", err, "addr", metricsAddr)
} else {
go func() {
workerLog.Info("metrics server listening", "addr", metricsAddr)
if err := metricsSrv.Serve(metricsListener); err != nil && !errors.Is(err, http.ErrServerClosed) {
workerLog.Error("metrics server failed", "error", err)
}
}()
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := metricsSrv.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
workerLog.Warn("graceful shutdown of metrics server failed", "error", err)
}
}()
}
}
}
}

Expand Down
Loading
Loading