From 06377b1df1f47bdefef6bc3d261466ad1ac61515 Mon Sep 17 00:00:00 2001 From: Taylor Lovett Date: Tue, 1 May 2018 14:42:43 +0900 Subject: [PATCH] Strip HTML before analyzing; add post_content_filtered field. #1070 --- classes/class-ep-api.php | 50 +++++++++++++++++++-------------------- includes/mappings/5-2.php | 4 ++++ 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/classes/class-ep-api.php b/classes/class-ep-api.php index 633a7c025..b40e04a5d 100644 --- a/classes/class-ep-api.php +++ b/classes/class-ep-api.php @@ -614,31 +614,31 @@ public function prepare_post( $post_id ) { remove_action( 'updated_postmeta', array( EP_Sync_Manager::factory(), 'action_queue_meta_sync' ), 10 ); $post_args = array( - 'post_id' => $post_id, - 'ID' => $post_id, - 'post_author' => $user_data, - 'post_date' => $post_date, - 'post_date_gmt' => $post_date_gmt, - 'post_title' => $this->prepare_text_content( get_the_title( $post_id ) ), - 'post_excerpt' => $this->prepare_text_content( $post->post_excerpt ), - 'post_content' => $this->prepare_text_content( apply_filters( 'the_content', $post->post_content ) ), - 'post_status' => $post->post_status, - 'post_name' => $post->post_name, - 'post_modified' => $post_modified, - 'post_modified_gmt' => $post_modified_gmt, - 'post_parent' => $post->post_parent, - 'post_type' => $post->post_type, - 'post_mime_type' => $post->post_mime_type, - 'permalink' => get_permalink( $post_id ), - 'terms' => $this->prepare_terms( $post ), - 'meta' => $this->prepare_meta_types( $this->prepare_meta( $post ) ), // post_meta removed in 2.4 - 'date_terms' => $this->prepare_date_terms( $post_date ), - 'comment_count' => $comment_count, - 'comment_status' => $comment_status, - 'ping_status' => $ping_status, - 'menu_order' => $menu_order, - 'guid' => $post->guid - //'site_id' => get_current_blog_id(), + 'post_id' => $post_id, + 'ID' => $post_id, + 'post_author' => $user_data, + 'post_date' => $post_date, + 'post_date_gmt' => $post_date_gmt, + 'post_title' => $post->post_title, + 'post_excerpt' => $post->post_excerpt, + 'post_content_filtered' => apply_filters( 'the_content', $post->post_content ), + 'post_content' => $post->post_content, + 'post_status' => $post->post_status, + 'post_name' => $post->post_name, + 'post_modified' => $post_modified, + 'post_modified_gmt' => $post_modified_gmt, + 'post_parent' => $post->post_parent, + 'post_type' => $post->post_type, + 'post_mime_type' => $post->post_mime_type, + 'permalink' => get_permalink( $post_id ), + 'terms' => $this->prepare_terms( $post ), + 'meta' => $this->prepare_meta_types( $this->prepare_meta( $post ) ), // post_meta removed in 2.4 + 'date_terms' => $this->prepare_date_terms( $post_date ), + 'comment_count' => $comment_count, + 'comment_status' => $comment_status, + 'ping_status' => $ping_status, + 'menu_order' => $menu_order, + 'guid' => $post->guid, ); /** diff --git a/includes/mappings/5-2.php b/includes/mappings/5-2.php index c4dff2ce8..365c0bd0b 100644 --- a/includes/mappings/5-2.php +++ b/includes/mappings/5-2.php @@ -19,6 +19,7 @@ 'default' => array( 'tokenizer' => 'standard', 'filter' => array( 'standard', 'ewp_word_delimiter', 'lowercase', 'stop', 'ewp_snowball' ), + 'char_filter' => array( 'html_strip' ), 'language' => apply_filters( 'ep_analyzer_language', 'english', 'analyzer_default' ), ), 'shingle_analyzer' => array( @@ -255,6 +256,9 @@ 'post_content' => array( 'type' => 'text', ), + 'post_content_filtered' => array( + 'type' => 'text', + ), 'post_status' => array( 'type' => 'keyword', ),