Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions crates/common/src/creative.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,12 @@ pub fn rewrite_creative_html(markup: &str, settings: &Settings) -> String {
let mut rewriter = HtmlRewriter::new(
HtmlSettings {
element_content_handlers: vec![
// Inject tsjs-creative at the top of body once
// Inject unified tsjs bundle at the top of body once
element!("body", {
let injected = injected_ts_creative.clone();
move |el| {
if !injected.get() {
let script_tag = tsjs::creative_script_tag();
let script_tag = tsjs::unified_script_tag();
el.prepend(&script_tag, ContentType::Html);
injected.set(true);
}
Expand Down Expand Up @@ -490,20 +490,20 @@ mod tests {
let html = r#"<html><body><p>hello</p></body></html>"#;
let out = rewrite_creative_html(html, &settings);
assert!(
out.contains("/static/tsjs=tsjs-creative.min.js"),
"expected tsjs-creative injection: {}",
out.contains("/static/tsjs=tsjs-unified.min.js"),
"expected unified tsjs injection: {}",
out
);
// Inject only once
assert_eq!(out.matches("/static/tsjs=tsjs-creative.min.js").count(), 1);
assert_eq!(out.matches("/static/tsjs=tsjs-unified.min.js").count(), 1);
}

#[test]
fn injects_tsjs_creative_once_with_multiple_bodies() {
fn injects_tsjs_unified_once_with_multiple_bodies() {
let settings = crate::test_support::tests::create_test_settings();
let html = r#"<html><body>one</body><body>two</body></html>"#;
let out = rewrite_creative_html(html, &settings);
assert_eq!(out.matches("/static/tsjs=tsjs-creative.min.js").count(), 1);
assert_eq!(out.matches("/static/tsjs=tsjs-unified.min.js").count(), 1);
}

#[test]
Expand Down
158 changes: 28 additions & 130 deletions crates/common/src/html_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
//!
//! This module provides a StreamProcessor implementation for HTML content.
use std::cell::Cell;
use std::collections::BTreeSet;
use std::rc::Rc;

use lol_html::{element, html_content::ContentType, text, Settings as RewriterSettings};
Expand All @@ -25,7 +24,6 @@ pub struct HtmlProcessorConfig {
pub integrations: IntegrationRegistry,
pub nextjs_enabled: bool,
pub nextjs_attributes: Vec<String>,
pub integration_assets: Vec<String>,
}

impl HtmlProcessorConfig {
Expand All @@ -37,12 +35,6 @@ impl HtmlProcessorConfig {
request_host: &str,
request_scheme: &str,
) -> Self {
let asset_set: BTreeSet<String> = integrations
.registered_integrations()
.into_iter()
.flat_map(|meta| meta.assets)
.collect();

Self {
origin_host: origin_host.to_string(),
request_host: request_host.to_string(),
Expand All @@ -51,7 +43,6 @@ impl HtmlProcessorConfig {
integrations: integrations.clone(),
nextjs_enabled: settings.publisher.nextjs.enabled,
nextjs_attributes: settings.publisher.nextjs.rewrite_attributes.clone(),
integration_assets: asset_set.into_iter().collect(),
}
}
}
Expand Down Expand Up @@ -129,8 +120,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
let nextjs_attributes = Rc::new(config.nextjs_attributes.clone());

let injected_tsjs = Rc::new(Cell::new(false));
let integration_assets = Rc::new(config.integration_assets.clone());
let injected_assets = Rc::new(Cell::new(false));
let integration_registry = config.integrations.clone();
let script_rewriters = integration_registry.script_rewriters();

Expand All @@ -145,27 +134,19 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
}

let mut element_content_handlers = vec![
// Inject unified tsjs bundle once at the start of <head>
element!("head", {
let injected_tsjs = injected_tsjs.clone();
let integration_assets = integration_assets.clone();
let injected_assets = injected_assets.clone();
move |el| {
if !injected_tsjs.get() {
let loader = tsjs::core_script_tag();
let loader = tsjs::unified_script_tag();
el.prepend(&loader, ContentType::Html);
injected_tsjs.set(true);
}
if !integration_assets.is_empty() && !injected_assets.get() {
for asset in integration_assets.iter() {
let attrs = format!("async data-tsjs-integration=\"{}\"", asset);
let tag = tsjs::integration_script_tag(asset, &attrs);
el.append(&tag, ContentType::Html);
}
injected_assets.set(true);
}
Ok(())
}
}),
// Replace URLs in href attributes
element!("[href]", {
let patterns = patterns.clone();
let rewrite_prebid = config.enable_prebid;
Expand All @@ -174,7 +155,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
if let Some(mut href) = el.get_attribute("href") {
let original_href = href.clone();
if rewrite_prebid && is_prebid_script_url(&href) {
href = tsjs::ext_script_src();
el.remove();
} else {
let new_href = href
.replace(&patterns.https_origin(), &patterns.replacement_url())
Expand Down Expand Up @@ -204,6 +185,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
Ok(())
}
}),
// Replace URLs in src attributes
element!("[src]", {
let patterns = patterns.clone();
let rewrite_prebid = config.enable_prebid;
Expand All @@ -212,7 +194,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
if let Some(mut src) = el.get_attribute("src") {
let original_src = src.clone();
if rewrite_prebid && is_prebid_script_url(&src) {
src = tsjs::ext_script_src();
el.remove();
} else {
let new_src = src
.replace(&patterns.https_origin(), &patterns.replacement_url())
Expand Down Expand Up @@ -242,6 +224,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
Ok(())
}
}),
// Replace URLs in action attributes
element!("[action]", {
let patterns = patterns.clone();
let integrations = integration_registry.clone();
Expand Down Expand Up @@ -275,6 +258,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
Ok(())
}
}),
// Replace URLs in srcset attributes (for responsive images)
element!("[srcset]", {
let patterns = patterns.clone();
let integrations = integration_registry.clone();
Expand Down Expand Up @@ -313,6 +297,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
Ok(())
}
}),
// Replace URLs in imagesrcset attributes (for link preload)
element!("[imagesrcset]", {
let patterns = patterns.clone();
let integrations = integration_registry.clone();
Expand Down Expand Up @@ -405,28 +390,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso

let rewriter_settings = RewriterSettings {
element_content_handlers,

// TODO: Consider adding text content replacement if needed with settings
// // Replace URLs in text content
// document_content_handlers: vec![lol_html::doc_text!({
// move |text| {
// let content = text.as_str();

// // Apply URL replacements
// let mut new_content = content.to_string();
// for replacement in replacer.replacements.iter() {
// if new_content.contains(&replacement.find) {
// new_content = new_content.replace(&replacement.find, &replacement.replace_with);
// }
// }

// if new_content != content {
// text.replace(&new_content, lol_html::html_content::ContentType::Text);
// }

// Ok(())
// }
// })],
..RewriterSettings::default()
};

Expand All @@ -437,24 +400,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
mod tests {
use super::*;
use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline};
use crate::tsjs;
use std::io::Cursor;

const MOCK_TESTLIGHT_SRC: &str = "https://mock.testassets/testlight.js";

struct MockBundleGuard;

fn mock_testlight_bundle() -> MockBundleGuard {
tsjs::mock_integration_bundle("testlight", MOCK_TESTLIGHT_SRC);
MockBundleGuard
}

impl Drop for MockBundleGuard {
fn drop(&mut self) {
tsjs::clear_mock_integration_bundles();
}
}

fn create_test_config() -> HtmlProcessorConfig {
HtmlProcessorConfig {
origin_host: "origin.example.com".to_string(),
Expand All @@ -464,19 +411,18 @@ mod tests {
integrations: IntegrationRegistry::default(),
nextjs_enabled: false,
nextjs_attributes: vec!["href".to_string(), "link".to_string(), "url".to_string()],
integration_assets: Vec::new(),
}
}

#[test]
fn test_injects_tsjs_script_and_rewrites_prebid_refs() {
fn test_injects_unified_bundle_and_removes_prebid_refs() {
let html = r#"<html><head>
<script src="/js/prebid.min.js"></script>
<link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
</head><body></body></html>"#;

let mut config = create_test_config();
config.enable_prebid = true; // enable rewriting of Prebid URLs
config.enable_prebid = true; // enable removal of Prebid URLs
let processor = create_html_processor(config);
let pipeline_config = PipelineConfig {
input_compression: Compression::None,
Expand All @@ -489,19 +435,20 @@ mod tests {
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
assert!(result.is_ok());
let processed = String::from_utf8_lossy(&output);
assert!(processed.contains("/static/tsjs=tsjs-core.min.js"));
// Prebid references are rewritten to our extension when auto-configure is on
assert!(processed.contains("/static/tsjs=tsjs-ext.min.js"));
assert!(processed.contains("/static/tsjs=tsjs-unified.min.js"));
// Prebid script references should be removed when auto-configure is on
assert!(!processed.contains("prebid.min.js"));
assert!(!processed.contains("cdn.prebid.org/prebid.js"));
}

#[test]
fn test_injects_tsjs_script_and_rewrites_prebid_with_query_string() {
fn test_injects_unified_bundle_and_removes_prebid_with_query_string() {
let html = r#"<html><head>
<script src="/wp-content/plugins/prebidjs/js/prebidjs.min.js?v=1.2.3"></script>
</head><body></body></html>"#;

let mut config = create_test_config();
config.enable_prebid = true; // enable rewriting of Prebid URLs
config.enable_prebid = true; // enable removal of Prebid URLs
let processor = create_html_processor(config);
let pipeline_config = PipelineConfig {
input_compression: Compression::None,
Expand All @@ -514,19 +461,21 @@ mod tests {
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
assert!(result.is_ok());
let processed = String::from_utf8_lossy(&output);
assert!(processed.contains("/static/tsjs=tsjs-core.min.js"));
assert!(processed.contains("/static/tsjs=tsjs-ext.min.js"));
// Should inject unified bundle
assert!(processed.contains("/static/tsjs=tsjs-unified.min.js"));
// Prebid script should be removed
assert!(!processed.contains("prebidjs.min.js"));
}

#[test]
fn test_always_injects_tsjs_script() {
fn test_always_injects_unified_bundle() {
let html = r#"<html><head>
<script src="/js/prebid.min.js"></script>
<link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
</head><body></body></html>"#;

let mut config = create_test_config();
config.enable_prebid = false; // No longer affects tsjs injection
config.enable_prebid = false; // When disabled, don't remove Prebid scripts
let processor = create_html_processor(config);
let pipeline_config = PipelineConfig {
input_compression: Compression::None,
Expand All @@ -539,10 +488,11 @@ mod tests {
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
assert!(result.is_ok());
let processed = String::from_utf8_lossy(&output);
// When auto-configure is disabled, do not rewrite Prebid references
// When auto-configure is disabled, do not remove Prebid references
assert!(processed.contains("/js/prebid.min.js"));
assert!(processed.contains("cdn.prebid.org/prebid.js"));
assert!(processed.contains("/static/tsjs=tsjs-core.min.js"));
// But still inject unified bundle
assert!(processed.contains("/static/tsjs=tsjs-unified.min.js"));
}

#[test]
Expand Down Expand Up @@ -712,10 +662,10 @@ mod tests {
use crate::test_support::tests::create_test_settings;

let settings = create_test_settings();
let registry = IntegrationRegistry::new(&settings);
let integrations = IntegrationRegistry::default();
let config = HtmlProcessorConfig::from_settings(
&settings,
&registry,
&integrations,
"origin.test-publisher.com",
"proxy.example.com",
"https",
Expand Down Expand Up @@ -808,58 +758,6 @@ mod tests {
);
}

#[test]
fn test_integration_registry_rewrites_integration_scripts() {
use serde_json::json;

let html = r#"<html><head>
<script src="https://cdn.testlight.com/v1/testlight.js"></script>
</head><body></body></html>"#;

let _bundle_guard = mock_testlight_bundle();
let mut settings = Settings::default();
let shim_src = tsjs::integration_script_src("testlight");
settings
.integrations
.insert_config(
"testlight",
&json!({
"enabled": true,
"endpoint": "https://example.com/openrtb2/auction",
"rewrite_scripts": true,
"shim_src": shim_src,
}),
)
.expect("should insert testlight config");

let registry = IntegrationRegistry::new(&settings);
let mut config = create_test_config();
config.integrations = registry;

let processor = create_html_processor(config);
let pipeline_config = PipelineConfig {
input_compression: Compression::None,
output_compression: Compression::None,
chunk_size: 8192,
};
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);

let mut output = Vec::new();
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
assert!(result.is_ok());

let processed = String::from_utf8_lossy(&output);
let expected_src = tsjs::integration_script_src("testlight");
assert!(
processed.contains(&expected_src),
"Integration shim should replace integration script reference"
);
assert!(
!processed.contains("cdn.testlight.com"),
"Original integration URL should be removed"
);
}

#[test]
fn test_real_publisher_html_with_gzip() {
use flate2::read::GzDecoder;
Expand Down
Loading