Permalink
Browse files

Add robots.txt entries for provider's crawlers

Google recommends setting an empty robots.txt Disallow for their user
agent, Mediapartners-Google for sites serving their ads.

To prevent users from having to add these entries on their own, we can
add them automatically. Each provider now has a $crawler_user_agent
parameter, and the Disallows are filterable.
  • Loading branch information...
1 parent 7a3c74f commit d6a9013a5e5db08a5d9527cbafa66a1e0975b2e7 @nickdaugherty nickdaugherty committed May 1, 2013
@@ -18,6 +18,7 @@ class ACM_Provider {
public $output_tokens = array();
public $ad_tag_ids;
public $ad_code_args = array();
+ public $crawler_user_agent;
function __construct() {
if ( empty( $this->ad_code_args ) ) {
// This is not actual data, but rather format:
@@ -40,5 +41,36 @@ function __construct() {
if ( empty( $this->output_html ) ) {
$this->output_html = '<script type="text/javascript" src="%url%"></script>';
}
+
+ if ( ! empty( $this->crawler_user_agent ) ) {
+ $should_do_robotstxt = apply_filters( 'acm_should_do_robotstxt', true, $this );
+
+ if ( true === $should_do_robotstxt )
+ add_action( 'do_robotstxt', array( $this, 'action_do_robotstxt' ), 10 );
+ }
+ }
+
+ public function action_do_robotstxt() {
+ $public = get_option( 'blog_public' );
+
+ $disallowed = array();
+
+ if ( '0' == $public ) {
+ $disallowed[] = '/';
+ } else {
+ $disallowed[] = '';
+ }
+
+ $disallowed = apply_filters( 'acm_robotstxt_disallow', $disallowed, $this );
+
+ // If we have no disallows to add, don't add anything (including User-agent)
+ if ( ! is_array( $disallowed ) || empty( $disallowed ) )
+ return;
+
+ echo 'User-agent: ' . $this->crawler_user_agent . PHP_EOL;
+
+ foreach ( $disallowed as $disallow ) {
+ echo 'Disallow: ' . $disallow . PHP_EOL;
+ }
}
}
@@ -1,6 +1,7 @@
<?php
class Doubleclick_For_Publishers_Async_ACM_Provider extends ACM_Provider {
+ public $crawler_user_agent = 'Mediapartners-Google';
public function __construct() {
@@ -9,6 +9,8 @@ class Doubleclick_For_Publishers_Columns {
}
class Doubleclick_For_Publishers_ACM_Provider extends ACM_Provider {
+ public $crawler_user_agent = 'Mediapartners-Google';
+
function __construct() {
// Default output HTML
$this->output_html = '<script type="text/javascript" src="%url%"></script>';
@@ -3,6 +3,7 @@
* Google AdSense Ad Provider for Ad Code manager
*/
class Google_AdSense_ACM_Provider extends ACM_Provider {
+ public $crawler_user_agent = 'Mediapartners-Google';
/**
* Register default options for Google AdSense

0 comments on commit d6a9013

Please sign in to comment.