diff --git a/contrib/compose/nginx_medium_rewrite_urls.conf b/contrib/compose/nginx_medium_rewrite_urls.conf index ac2a804..fd4b897 100644 --- a/contrib/compose/nginx_medium_rewrite_urls.conf +++ b/contrib/compose/nginx_medium_rewrite_urls.conf @@ -33,6 +33,34 @@ map $uri $needs_conversion { default "no"; } +# Origin micro-cache. Sits in front of Ghost to absorb traffic spikes so the DB connection pool (60) is never exhausted. Mirrors the production config +# in nginx.conf.sigil — keep them in sync. +proxy_cache_path /var/cache/nginx/ghost levels=1:2 keys_zone=ghost:50m + max_size=2g inactive=1h use_temp_path=off; + +# Bypass cache entirely for logged-in members and Ghost admin/staff. Both proxy_cache_bypass and proxy_no_cache must reference these — setting only +# one leaks personalized responses to anonymous visitors. +map $http_cookie $ghost_bypass_cookie { + default 0; + "~*ghost-members" 1; + "~*ghost-admin-api-session" 1; +} + +map $request_method $ghost_bypass_method { + default 0; + POST 1; + PUT 1; + PATCH 1; + DELETE 1; +} + +map $request_uri $ghost_bypass_path { + default 0; + "~^/ghost/" 1; + "~^/members/api/" 1; + "~^/members/auth/" 1; +} + # Set up a JSON log format for better structured logging. log_format json escape=json '{' @@ -62,6 +90,61 @@ server { # https://docs.ghost.org/migration/medium#using-custom-domains rewrite "^/(.*)(-[0-9a-f]{10,12})$" /$1 permanent; + # Sitemap routes: 1h TTL because search crawlers tolerate hours of staleness. Mirrors the production block in nginx.conf.sigil. Matches + # /sitemap.xml plus the five sub-sitemaps Ghost emits. + location ~* ^/sitemap(-[a-z]+)?\.xml$ { + proxy_pass http://ghost:2368; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache ghost; + proxy_cache_key "$scheme$host$request_uri"; + proxy_cache_valid 200 301 302 1h; + proxy_cache_valid 404 5m; + + proxy_cache_lock on; + proxy_cache_lock_timeout 5s; + proxy_cache_lock_age 10s; + proxy_cache_use_stale updating error timeout + http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + + proxy_cache_bypass $ghost_bypass_method; + proxy_no_cache $ghost_bypass_method; + + add_header X-Cache-Status $upstream_cache_status always; + } + + # Feed routes: longer TTL because RSS readers poll on a schedule and tolerate minutes of staleness. Mirrors the production block in + # nginx.conf.sigil — keep them in sync. Regex location wins over `location /`, so the Lua URL rewriter is skipped (safe: feed paths + # are ASCII and carry no Medium-ID suffixes). + location ~* ^/(rss|feed)(/|$|\.xml$) { + proxy_pass http://ghost:2368; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache ghost; + proxy_cache_key "$scheme$host$request_uri"; + proxy_cache_valid 200 301 302 10m; + proxy_cache_valid 404 1m; + + proxy_cache_lock on; + proxy_cache_lock_timeout 5s; + proxy_cache_lock_age 10s; + proxy_cache_use_stale updating error timeout + http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + + proxy_cache_bypass $ghost_bypass_cookie $ghost_bypass_method; + proxy_no_cache $ghost_bypass_cookie $ghost_bypass_method; + + add_header X-Cache-Status $upstream_cache_status always; + } + location / { # Check if conversion is needed using the map access_by_lua_block { @@ -137,5 +220,27 @@ server { proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + + # ---- Origin micro-cache -------------------------------------------- + # Trio that prevents the thundering herd: cache_lock collapses concurrent cold misses to one upstream request; use_stale updating + # + background_update means TTL expiry on a hot key triggers ONE async refresh while everyone else gets stale instantly. + proxy_cache ghost; + proxy_cache_key "$scheme$host$request_uri"; + proxy_cache_valid 200 301 302 30s; + proxy_cache_valid 404 10s; + + proxy_cache_lock on; + proxy_cache_lock_timeout 5s; + proxy_cache_lock_age 10s; + + proxy_cache_use_stale updating error timeout + http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + + # Logged-in / mutating / admin: never serve, never store. + proxy_cache_bypass $ghost_bypass_cookie $ghost_bypass_method $ghost_bypass_path; + proxy_no_cache $ghost_bypass_cookie $ghost_bypass_method $ghost_bypass_path; + + add_header X-Cache-Status $upstream_cache_status always; } } diff --git a/docker-compose.yml b/docker-compose.yml index 74dbb60..dbad0ac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,7 @@ services: - 8069:8069 volumes: - ./contrib/compose/nginx_medium_rewrite_urls.conf:/etc/nginx/conf.d/nginx_medium_rewrite_urls.conf + - nginx_cache:/var/cache/nginx/ghost depends_on: - ghost @@ -68,3 +69,4 @@ volumes: ghost: db: minio_data: + nginx_cache: diff --git a/nginx.conf.sigil b/nginx.conf.sigil index bd1f9d8..b709500 100644 --- a/nginx.conf.sigil +++ b/nginx.conf.sigil @@ -38,6 +38,76 @@ map $uri $needs_conversion { default "no"; } +# Origin micro-cache. Sits in front of Ghost to absorb Cloudflare cache misses during traffic spikes so the DB connection pool is never exhausted. +# levels=1:2 spreads files across subdirectories; keys_zone holds metadata in shared memory (50m ~= 400k keys). max_size caps disk usage; inactive evicts +# entries not requested for 1h regardless of TTL. +proxy_cache_path /var/cache/nginx/{{ .APP }} levels=1:2 keys_zone=ghost:50m + max_size=2g inactive=1h use_temp_path=off; + +# Bypass the cache entirely for logged-in members and Ghost admin/staff. +# Both proxy_cache_bypass (don't serve from cache) and proxy_no_cache (don't store in cache) read this — setting only one is the classic foot-gun that +# leaks personalized responses to anonymous visitors. +map $http_cookie $ghost_bypass_cookie { + default 0; + "~*ghost-members" 1; # member SSR sessions (front-end) + "~*ghost-admin-api-session" 1; # staff/admin sessions +} + +# Bypass for mutating methods (POST/PUT/PATCH/DELETE). +map $request_method $ghost_bypass_method { + default 0; + POST 1; + PUT 1; + PATCH 1; + DELETE 1; +} + +# Bypass for admin UI, admin API, and members auth endpoints. +map $request_uri $ghost_bypass_path { + default 0; + "~^/ghost/" 1; + "~^/members/api/" 1; + "~^/members/auth/" 1; +} + +# Cloudflare real-IP unwrapping. Without this, $remote_addr is whichever Cloudflare edge IP forwarded the request, which (a) makes access logs +# useless for tracing abuse, (b) defeats per-IP rate limits, and (c) sends Cloudflare's IP to Ghost in X-Forwarded-For instead of the real visitor. +# +# After this block, $remote_addr is rewritten to the value of the CF-Connecting-IP header — but only when the request actually came from a +# Cloudflare edge IP listed below. Direct hits to the origin (someone who discovered the origin IP) keep their real $remote_addr and can't spoof +# CF-Connecting-IP. +# +# Source of truth, refresh periodically: +# https://www.cloudflare.com/ips-v4 +# https://www.cloudflare.com/ips-v6 +# +# Requires the ngx_http_realip_module — included in the standard +# nginx-full / nginx-extras packages on Ubuntu, so already present on Dokku. +set_real_ip_from 173.245.48.0/20; +set_real_ip_from 103.21.244.0/22; +set_real_ip_from 103.22.200.0/22; +set_real_ip_from 103.31.4.0/22; +set_real_ip_from 141.101.64.0/18; +set_real_ip_from 108.162.192.0/18; +set_real_ip_from 190.93.240.0/20; +set_real_ip_from 188.114.96.0/20; +set_real_ip_from 197.234.240.0/22; +set_real_ip_from 198.41.128.0/17; +set_real_ip_from 162.158.0.0/15; +set_real_ip_from 104.16.0.0/13; +set_real_ip_from 104.24.0.0/14; +set_real_ip_from 172.64.0.0/13; +set_real_ip_from 131.0.72.0/22; +set_real_ip_from 2400:cb00::/32; +set_real_ip_from 2606:4700::/32; +set_real_ip_from 2803:f800::/32; +set_real_ip_from 2405:b500::/32; +set_real_ip_from 2405:8100::/32; +set_real_ip_from 2a06:98c0::/29; +set_real_ip_from 2c0f:f248::/32; +real_ip_header CF-Connecting-IP; +real_ip_recursive on; + # Set up a JSON log format for better structured logging. log_format json escape=json '{' @@ -101,6 +171,69 @@ server { lingering_timeout 5s; send_timeout 60s; + # Sitemap routes: even longer TTL than feeds because search engine crawlers tolerate hours of staleness and Ghost regenerates these on + # post publish/update anyway. Matches the index (/sitemap.xml) and the five sub-sitemaps Ghost emits (/sitemap-pages.xml, -posts, -tags, + # -authors). Regex location, so Lua rewriter is skipped — paths are ASCII and won't carry Medium-ID suffixes. + location ~* ^/sitemap(-[a-z]+)?\.xml$ { + proxy_pass http://{{ .APP }}; + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header X-Forwarded-Port $server_port; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache ghost; + proxy_cache_key "$scheme$host$request_uri"; + proxy_cache_valid 200 301 302 1h; + proxy_cache_valid 404 5m; + + proxy_cache_lock on; + proxy_cache_lock_timeout 5s; + proxy_cache_lock_age 10s; + proxy_cache_use_stale updating error timeout + http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + + proxy_cache_bypass $ghost_bypass_method; + proxy_no_cache $ghost_bypass_method; + + add_header X-Cache-Status $upstream_cache_status always; + } + + # Feed routes: longer TTL because RSS readers (Feedly, Inoreader, etc.) poll on a schedule and tolerate minutes of staleness. Same cache and + # bypass machinery as the main location, just with a 10-minute valid window so polls collapse onto ~6 origin requests/hour total instead of + # one per poller per 30s. + # + # Note: this is a regex location and wins over `location /`, so the Lua URL rewriter is skipped here. That's fine — /rss, /feed, and /*.xml + # are ASCII and don't carry Medium-ID suffixes. + location ~* ^/(rss|feed)(/|$|\.xml$) { + proxy_pass http://{{ .APP }}; + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header X-Forwarded-Port $server_port; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache ghost; + proxy_cache_key "$scheme$host$request_uri"; + proxy_cache_valid 200 301 302 10m; + proxy_cache_valid 404 1m; + + proxy_cache_lock on; + proxy_cache_lock_timeout 5s; + proxy_cache_lock_age 10s; + proxy_cache_use_stale updating error timeout + http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + + # Defense in depth — feed readers don't carry member cookies in + # practice, but skip cache for them anyway if they ever appear. + proxy_cache_bypass $ghost_bypass_cookie $ghost_bypass_method; + proxy_no_cache $ghost_bypass_cookie $ghost_bypass_method; + + add_header X-Cache-Status $upstream_cache_status always; + } + location / { gzip on; gzip_min_length 1100; @@ -194,6 +327,32 @@ server { proxy_set_header X-Forwarded-Port $server_port; proxy_set_header X-Forwarded-Proto $scheme; proxy_set_header X-Request-Start $msec; + + # ---- Origin micro-cache ------------------------------------------------ + # Cache anonymous GET/HEAD responses for 30s. The trio (cache_lock + use_stale updating + background_update) prevents the + # thundering herd: at TTL expiry, ONE request refreshes the cache in the background while every other concurrent request is served stale instantly. + proxy_cache ghost; + proxy_cache_key "$scheme$host$request_uri"; + proxy_cache_valid 200 301 302 30s; + proxy_cache_valid 404 10s; + + # Collapse concurrent cold misses for the same key onto a single upstream request. Waiters block until the holder responds or the timeout fires. + proxy_cache_lock on; + proxy_cache_lock_timeout 5s; + proxy_cache_lock_age 10s; + + # Serve stale while a refresh is in flight, and as a fallback if the origin is failing — keeps the site up even if Ghost or the DB are sick. + proxy_cache_use_stale updating error timeout + http_500 http_502 http_503 http_504; + proxy_cache_background_update on; + + # Skip the cache for logged-in members, admin, mutating methods, and admin/auth paths. Both directives are required: bypass means "don't + # serve from cache", no_cache means "don't store this response". + proxy_cache_bypass $ghost_bypass_cookie $ghost_bypass_method $ghost_bypass_path; + proxy_no_cache $ghost_bypass_cookie $ghost_bypass_method $ghost_bypass_path; + + # Surface cache state for debugging (HIT / MISS / BYPASS / UPDATING / STALE). + add_header X-Cache-Status $upstream_cache_status always; } include {{ .DOKKU_ROOT }}/{{ .APP }}/nginx.conf.d/*.conf; }