diff --git a/crates/common/src/creative.rs b/crates/common/src/creative.rs index 3953b2e..02041dd 100644 --- a/crates/common/src/creative.rs +++ b/crates/common/src/creative.rs @@ -43,11 +43,18 @@ use crate::tsjs; use lol_html::{element, html_content::ContentType, text, HtmlRewriter, Settings as HtmlSettings}; // Helper: normalize to absolute URL if http/https or protocol-relative. Otherwise None. -pub(super) fn to_abs(u: &str) -> Option { +// Checks against the rewrite blacklist to exclude configured domains/patterns from proxying. +pub(super) fn to_abs(u: &str, settings: &Settings) -> Option { let t = u.trim(); if t.is_empty() { return None; } + + // Skip if excluded from rewrites in settings + if settings.rewrite.is_excluded(t) { + return None; + } + // Skip non-network schemes commonly found in creatives let lower = t.to_ascii_lowercase(); if lower.starts_with("data:") @@ -59,6 +66,7 @@ pub(super) fn to_abs(u: &str) -> Option { { return None; } + if t.starts_with("//") { Some(format!("https:{}", t)) } else if lower.starts_with("http://") || lower.starts_with("https://") { @@ -106,7 +114,7 @@ pub(super) fn rewrite_style_urls(style: &str, settings: &Settings) -> String { (s, e) }; let url_val = &style[qs..qe]; - let new_val = if let Some(abs) = to_abs(url_val) { + let new_val = if let Some(abs) = to_abs(url_val, settings) { build_proxy_url(settings, &abs) } else { url_val.to_string() @@ -196,7 +204,7 @@ pub(super) fn build_click_url(settings: &Settings, clear_url: &str) -> String { #[inline] pub(super) fn proxy_if_abs(settings: &Settings, val: &str) -> Option { - to_abs(val).map(|abs| build_proxy_url(settings, &abs)) + to_abs(val, settings).map(|abs| build_proxy_url(settings, &abs)) } /// Split a srcset/imagesrcset attribute into candidate strings. @@ -259,7 +267,7 @@ pub(super) fn rewrite_srcset(srcset: &str, settings: &Settings) -> String { let mut parts = it.split_whitespace(); let url = parts.next().unwrap_or(""); let descriptor = parts.collect::>().join(" "); - let rewritten = if let Some(abs) = to_abs(url) { + let rewritten = if let Some(abs) = to_abs(url, settings) { build_proxy_url(settings, &abs) } else { url.to_string() @@ -404,7 +412,7 @@ pub fn rewrite_creative_html(markup: &str, settings: &Settings) -> String { // Click-through links element!("a[href], area[href]", |el| { if let Some(href) = el.get_attribute("href") { - if let Some(abs) = to_abs(&href) { + if let Some(abs) = to_abs(&href, settings) { let click = build_click_url(settings, &abs); let _ = el.set_attribute("href", &click); let _ = el.set_attribute("data-tsclick", &click); @@ -508,26 +516,27 @@ mod tests { #[test] fn to_abs_conversions() { + let settings = crate::test_support::tests::create_test_settings(); assert_eq!( - to_abs("//cdn.example/x"), + to_abs("//cdn.example/x", &settings), Some("https://cdn.example/x".to_string()) ); assert_eq!( - to_abs("HTTPS://cdn.example/x"), + to_abs("HTTPS://cdn.example/x", &settings), Some("HTTPS://cdn.example/x".to_string()) ); assert_eq!( - to_abs("http://cdn.example/x"), + to_abs("http://cdn.example/x", &settings), Some("http://cdn.example/x".to_string()) ); - assert_eq!(to_abs("/local/x"), None); + assert_eq!(to_abs("/local/x", &settings), None); assert_eq!( - to_abs(" //cdn.example/y "), + to_abs(" //cdn.example/y ", &settings), Some("https://cdn.example/y".to_string()) ); - assert_eq!(to_abs("data:image/png;base64,abcd"), None); - assert_eq!(to_abs("javascript:alert(1)"), None); - assert_eq!(to_abs("mailto:test@example.com"), None); + assert_eq!(to_abs("data:image/png;base64,abcd", &settings), None); + assert_eq!(to_abs("javascript:alert(1)", &settings), None); + assert_eq!(to_abs("mailto:test@example.com", &settings), None); } #[test] @@ -981,13 +990,14 @@ mod tests { #[test] fn to_abs_additional_cases() { + let settings = crate::test_support::tests::create_test_settings(); assert_eq!( - to_abs(" https://cdn.example/a "), + to_abs(" https://cdn.example/a ", &settings), Some("https://cdn.example/a".to_string()) ); - assert_eq!(to_abs("blob:xyz"), None); - assert_eq!(to_abs("tel:+123"), None); - assert_eq!(to_abs("about:blank"), None); + assert_eq!(to_abs("blob:xyz", &settings), None); + assert_eq!(to_abs("tel:+123", &settings), None); + assert_eq!(to_abs("about:blank", &settings), None); } #[test] @@ -1003,4 +1013,134 @@ mod tests { // relative candidate remains assert!(out.contains("/local/img.png 1x")); } + + #[test] + fn to_abs_respects_exclude_domains() { + let mut settings = crate::test_support::tests::create_test_settings(); + settings.rewrite.exclude_domains = vec!["trusted-cdn.example.com".to_string()]; + + // Excluded domain should return None (not proxied) + assert_eq!( + to_abs("https://trusted-cdn.example.com/lib.js", &settings), + None + ); + + // Non-excluded domain should return Some + assert_eq!( + to_abs("https://other-cdn.example.com/lib.js", &settings), + Some("https://other-cdn.example.com/lib.js".to_string()) + ); + } + + #[test] + fn to_abs_respects_wildcard_domains() { + let mut settings = crate::test_support::tests::create_test_settings(); + settings.rewrite.exclude_domains = vec!["*.cloudflare.com".to_string()]; + + // Should exclude base domain + assert_eq!(to_abs("https://cloudflare.com/cdn.js", &settings), None); + + // Should exclude subdomain + assert_eq!( + to_abs("https://cdnjs.cloudflare.com/lib.js", &settings), + None + ); + + // Should not exclude different domain + assert_eq!( + to_abs("https://notcloudflare.com/lib.js", &settings), + Some("https://notcloudflare.com/lib.js".to_string()) + ); + } + + #[test] + fn rewrite_html_excludes_blacklisted_domains() { + let mut settings = crate::test_support::tests::create_test_settings(); + settings.rewrite.exclude_domains = vec!["trusted-cdn.example.com".to_string()]; + + let html = r#" + + + "#; + + let out = rewrite_creative_html(html, &settings); + + // Excluded domain should NOT be rewritten + assert!(out.contains(r#"src="https://trusted-cdn.example.com/logo.png"#)); + + // Non-excluded domain SHOULD be rewritten + assert!(out.contains("/first-party/proxy?tsurl=")); + assert!(out.contains("other-cdn.example.com")); + } + + #[test] + fn rewrite_srcset_excludes_blacklisted_domains() { + let mut settings = crate::test_support::tests::create_test_settings(); + settings.rewrite.exclude_domains = vec!["trusted.example.com".to_string()]; + + let html = r#" + + "#; + + let out = rewrite_creative_html(html, &settings); + + // Excluded domain should remain as-is + assert!(out.contains("https://trusted.example.com/img-1x.png 1x")); + + // Non-excluded should be proxied + assert!(out.contains("/first-party/proxy?tsurl=")); + assert!(out.contains("cdn.example.com")); + } + + #[test] + fn rewrite_style_urls_excludes_blacklisted_domains() { + let mut settings = crate::test_support::tests::create_test_settings(); + settings.rewrite.exclude_domains = vec!["fonts.googleapis.com".to_string()]; + + let html = r#" + + "#; + + let out = rewrite_creative_html(html, &settings); + + // Excluded domain should remain unchanged + assert!(out.contains("url(https://fonts.googleapis.com/font.woff2)")); + + // Non-excluded should be proxied + assert!(out.contains("/first-party/proxy?tsurl=")); + assert!(out.contains("cdn.example.com")); + } + + #[test] + fn rewrite_click_urls_excludes_blacklisted_domains() { + let mut settings = crate::test_support::tests::create_test_settings(); + settings.rewrite.exclude_domains = vec!["trusted-landing.example.com".to_string()]; + + let html = r#" + Trusted Link + Ad Link + "#; + + let out = rewrite_creative_html(html, &settings); + + // Excluded domain should NOT be rewritten to first-party click + assert!(out.contains(r#"href="https://trusted-landing.example.com/page"#)); + // The excluded link should NOT have data-tsclick since it wasn't rewritten + assert!( + !out.contains(r#", +} + +impl Rewrite { + /// Checks if a URL should be excluded from rewriting based on domain matching + #[allow(dead_code)] + pub fn is_excluded(&self, url: &str) -> bool { + // Parse URL to extract host + let Ok(parsed) = url::Url::parse(url) else { + return false; + }; + + let host = parsed.host_str().unwrap_or(""); + + // Check exact domain matches (with wildcard support) + for domain in &self.exclude_domains { + if let Some(suffix) = domain.strip_prefix("*.") { + // Wildcard: *.example.com matches both example.com and sub.example.com + if host == suffix || host.ends_with(&format!(".{}", suffix)) { + return true; + } + } else if host == domain { + return true; + } + } + + false + } +} + #[derive(Debug, Default, Deserialize, Serialize, Validate)] pub struct Handler { #[validate(length(min = 1), custom(function = validate_path))] @@ -298,6 +333,9 @@ pub struct Settings { #[serde(default)] pub response_headers: HashMap, pub request_signing: Option, + #[serde(default)] + #[validate(nested)] + pub rewrite: Rewrite, } #[allow(unused)] @@ -893,4 +931,30 @@ mod tests { assert!(config.is_none(), "Disabled integrations should be skipped"); } + + #[test] + fn test_rewrite_is_excluded() { + let rewrite = Rewrite { + exclude_domains: vec!["cdn.example.com".to_string(), "*.example2.com".to_string()], + }; + + // Exact domain match + assert!(rewrite.is_excluded("http://cdn.example.com/image.png")); + + // Wildcard match - base domain + assert!(rewrite.is_excluded("https://example2.com/cdn.js")); + // Wildcard match - subdomains + assert!(rewrite.is_excluded("https://cdnjs.example2.com/lib.js")); + assert!(rewrite.is_excluded("https://sub.domain.example2.com/asset.js")); + + // Should NOT match + assert!(!rewrite.is_excluded("https://other.example.com/asset.js")); + assert!(!rewrite.is_excluded("https://sub.cdn.example.com/asset.js")); + assert!(!rewrite.is_excluded("https://example2.com.fake.com/asset.js")); + assert!(!rewrite.is_excluded("https://notexample.com/asset.js")); + + // Invalid URLs should not crash and should return false + assert!(!rewrite.is_excluded("not a url")); + assert!(!rewrite.is_excluded("")); + } } diff --git a/trusted-server.toml b/trusted-server.toml index 9ec4ab6..6149565 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -48,3 +48,11 @@ secret_store_id = "" endpoint = "https://testlight.example/openrtb2/auction" timeout_ms = 1200 rewrite_scripts = true + +# Rewrite configuration for creative HTML/CSS processing +# [rewrite] +# Domains to exclude from first-party rewriting (supports wildcards like "*.example.com") +# URLs from these domains will be left as-is and not proxied +# exclude_domains = [ +# "*.edgecompute.app", +# ]