# Validation measurements

In this notebook we present the measurements taken from the samples that we use for validation in Section *4.2 Dataset Overview and Validation*. 

In [1]:
import pandas as pd

In [2]:

detected_gtag_no_fdc = pd.read_csv('samples/detected_gtag_no_fdc.csv')
detected_mpixel_no_fdc = pd.read_csv('samples/detected_mpixel_no_fdc.csv')
meta_config_no_fdc = pd.read_csv('samples/meta_config_no_fdc.csv')
no_detected_gtag_installation = pd.read_csv('samples/no_detected_gtag_installation.csv')
no_detected_mpixel_installation = pd.read_csv('samples/no_detected_mpixel_installation.csv')

### Section 4.2.1: How many tracker installations and form data collections are we missing?

In [3]:
gtag_install_reachable = no_detected_gtag_installation[no_detected_gtag_installation['status'] != 'error loading']
gtag_install_reachable_len = len(gtag_install_reachable)

gtag_install_tn = no_detected_gtag_installation[no_detected_gtag_installation['status'] == 'TRUE']
gtag_install_tn_length = len(gtag_install_tn)
gtag_install_tn_percent = round( (gtag_install_tn_length / gtag_install_reachable_len ) * 100, 1)


gtag_install_fn = no_detected_gtag_installation[no_detected_gtag_installation['status'] == 'FALSE']
gtag_install_fn_length = len(gtag_install_fn)
gtag_install_fn_percent = round( (gtag_install_fn_length / gtag_install_reachable_len ) * 100, 1)

print(f'Google Installation True Negatives: {gtag_install_tn_percent}% ({gtag_install_tn_length})')
print(f'Google Installation False Negatives: {gtag_install_fn_percent}% ({gtag_install_fn_length})')

Google Installation True Negatives: 91.6% (316)
Google Installation False Negatives: 8.4% (29)


In [4]:
meta_install_reachable = no_detected_mpixel_installation[no_detected_mpixel_installation['status'] != 'error loading']
meta_install_reachable_len = len(meta_install_reachable)

meta_install_tn = no_detected_mpixel_installation[no_detected_mpixel_installation['status'] == 'TRUE']
meta_install_tn_length = len(meta_install_tn)
meta_install_tn_percent = round( (meta_install_tn_length / meta_install_reachable_len ) * 100, 1)


meta_install_fn = no_detected_mpixel_installation[no_detected_mpixel_installation['status'] == 'FALSE']
meta_install_fn_length = len(meta_install_fn)
meta_install_fn_percent = round( (meta_install_fn_length / meta_install_reachable_len ) * 100, 1)

print(f'Meta Installation True Negatives: {meta_install_tn_percent}% ({meta_install_tn_length})')
print(f'Meta Installation False Negatives: {meta_install_fn_percent}% ({meta_install_fn_length})')

Meta Installation True Negatives: 95.7% (352)
Meta Installation False Negatives: 4.3% (16)


In [5]:
gtag_fdc_reachable = detected_gtag_no_fdc[detected_gtag_no_fdc['status'] != 'error loading']
gtag_fdc_reachable_len = len(gtag_fdc_reachable)

gtag_fdc_tn = detected_gtag_no_fdc[detected_gtag_no_fdc['status'] == 'TRUE']
gtag_fdc_tn_length = len(gtag_fdc_tn)
gtag_fdc_tn_percent = round( (gtag_fdc_tn_length / gtag_fdc_reachable_len ) * 100, 1)


gtag_fdc_fn = detected_gtag_no_fdc[detected_gtag_no_fdc['status'] == 'FALSE']
gtag_fdc_fn_length = len(gtag_fdc_fn)
gtag_fdc_fn_percent = round( (gtag_fdc_fn_length / gtag_fdc_reachable_len ) * 100, 1)

print(f'Google FDC True Negatives: {gtag_fdc_tn_percent}% ({gtag_fdc_tn_length})')
print(f'Google FDC False Negatvies: {gtag_fdc_fn_percent}% ({gtag_fdc_fn_length})')

Google FDC True Negatives: 95.1% (346)
Google FDC False Negatvies: 1.4% (5)


In [6]:
meta_fdc_reachable = detected_mpixel_no_fdc[
    (detected_mpixel_no_fdc['status'] != 'no meta pixel') &
    (detected_mpixel_no_fdc['status'] != 'error loading')
]

meta_fdc_reachable_len = len(meta_fdc_reachable)
meta_fdc_tn = detected_mpixel_no_fdc[detected_mpixel_no_fdc['status'] == 'TRUE']
meta_fdc_tn_length = len(meta_fdc_tn)
meta_fdc_tn_percent = round( (meta_fdc_tn_length / meta_fdc_reachable_len ) * 100, 1)

meta_fdc_fn = detected_mpixel_no_fdc[detected_mpixel_no_fdc['status'] == 'FALSE']
meta_fdc_fn_length = len(meta_fdc_fn)
meta_fdc_fn_percent = round( (meta_fdc_fn_length / meta_fdc_reachable_len ) * 100, 1)

print(f'Meta FDC True Negatives: {meta_fdc_tn_percent}% ({meta_fdc_tn_length})')
print(f'Meta FDC False Negatvies: {meta_fdc_fn_percent}% ({meta_fdc_fn_length})')


Meta FDC True Negatives: 90.4% (293)
Meta FDC False Negatvies: 9.6% (31)


### Section 4.2.2: Form Data Collection as a proxy for FDC Configuration

In [7]:
meta_config_no_fdc_usable = meta_config_no_fdc[
    (meta_config_no_fdc['status'] == 'TRUE') | (meta_config_no_fdc['status'] == 'FALSE')
]
meta_config_no_fdc_usable_len = len(meta_config_no_fdc_usable)

meta_config_fdc_false_negative = meta_config_no_fdc_usable[meta_config_no_fdc_usable['status'] == 'FALSE']
meta_config_fdc_false_negative_len = len(meta_config_fdc_false_negative)
meta_config_fdc_false_negative_percent = round( (meta_config_fdc_false_negative_len / meta_config_no_fdc_usable_len ) * 100, 1)

meta_config_multiple_pixels = meta_config_no_fdc_usable[meta_config_no_fdc_usable['multiple_pixels_with_different_configs'] == True]
meta_config_multiple_pixels_len = len(meta_config_multiple_pixels)
meta_config_multiple_pixels_percent = round( (meta_config_multiple_pixels_len / meta_config_no_fdc_usable_len ) * 100, 1)

print(f'Websites with Meta config and Meta FDC: {meta_config_fdc_false_negative_percent}% ({meta_config_fdc_false_negative_len} websites)')
print(f'Websites with multiple pixels: {meta_config_multiple_pixels_percent}% ({meta_config_multiple_pixels_len} websites)')

Websites with Meta config and Meta FDC: 36.1% (79 websites)
Websites with multiple pixels: 17.8% (39 websites)


### Section 4.2.3: Effect of Accepting or Rejecting Cookies on Tracker Installation and Form Data Collection

In [8]:
accepted_cookies = pd.read_csv('samples/accepted_cookies.csv')
accepted_cookies_len = len(accepted_cookies)

meta_installed_ac = accepted_cookies[accepted_cookies['has_meta_pixel'] == True]
meta_installed_ac_len = len(meta_installed_ac)
meta_installed_percent_ac = round((meta_installed_ac_len / accepted_cookies_len ) * 100, 1)

meta_configuration_ac = accepted_cookies[accepted_cookies['meta_form_data_configuration'] == True]
meta_configuration_ac_len = len(meta_configuration_ac)
meta_configuration_percent_ac = round((meta_configuration_ac_len / accepted_cookies_len ) * 100, 1)

meta_pixels_fdc_ac = accepted_cookies[accepted_cookies['meta_form_data_collection'] == True]
meta_pixels_fdc_ac_len = len(meta_pixels_fdc_ac)
meta_fdc_percent_ac = round((meta_pixels_fdc_ac_len / accepted_cookies_len ) * 100, 1)

google_installed_ac = accepted_cookies[accepted_cookies['has_gtag'] == True]
google_installed_ac_len = len(google_installed_ac)
google_installed_percent_ac = round((google_installed_ac_len / accepted_cookies_len ) * 100, 1)

google_pixels_fdc_ac = accepted_cookies[accepted_cookies['google_form_data_collection'] == True]
google_pixels_fdc_ac_len = len(google_pixels_fdc_ac)
google_fdc_percent_ac = round((google_pixels_fdc_ac_len / accepted_cookies_len ) * 100, 1)

print("ACCEPTED COOKIES")
print(f'Meta Pixel tracker installations : {meta_installed_percent_ac}% ({meta_installed_ac_len} websites)')
print(f'Meta Pixel tracker configurations: {meta_configuration_percent_ac}% ({meta_configuration_ac_len} websites)')
print(f'Meta Pixel FDC : {meta_fdc_percent_ac}% ({meta_pixels_fdc_ac_len} websites)')
print(f'Google Pixel tracker installations : {google_installed_percent_ac}% ({google_installed_ac_len} websites)')
print(f'Google Pixel FDC : {google_fdc_percent_ac}% ({google_pixels_fdc_ac_len} websites)')

ACCEPTED COOKIES
Meta Pixel tracker installations : 43.8% (64 websites)
Meta Pixel tracker configurations: 24.0% (35 websites)
Meta Pixel FDC : 20.5% (30 websites)
Google Pixel tracker installations : 71.9% (105 websites)
Google Pixel FDC : 8.2% (12 websites)


In [9]:
rejected_cookies = pd.read_csv('samples/rejected_cookies.csv')
rejected_cookies_len = len(rejected_cookies)

meta_installed_rc = rejected_cookies[rejected_cookies['has_meta_pixel'] == True]
meta_installed_rc_len = len(meta_installed_rc)
meta_installed_percent_rc = round((meta_installed_rc_len / rejected_cookies_len ) * 100, 1)

meta_pixels_fdc_rc = rejected_cookies[rejected_cookies['meta_form_data_collection'] == True]
meta_pixels_fdc_rc_len = len(meta_pixels_fdc_rc)
meta_fdc_percent_rc = round((meta_pixels_fdc_rc_len / rejected_cookies_len ) * 100, 1)

google_installed_rc = rejected_cookies[rejected_cookies['has_gtag'] == True]
google_installed_rc_len = len(google_installed_rc)
google_installed_percent_rc = round((google_installed_rc_len / rejected_cookies_len ) * 100, 1)

google_pixels_fdc_rc = rejected_cookies[rejected_cookies['google_form_data_collection'] == True]
google_pixels_fdc_rc_len = len(google_pixels_fdc_rc)
google_fdc_percent_rc = round((google_pixels_fdc_rc_len / rejected_cookies_len ) * 100, 1)

print("REJECTED COOKIES")
print(f'Meta Pixel tracker installations : {meta_installed_percent_rc}% ({meta_installed_rc_len} websites)')
print(f'Meta Pixel FDC : {meta_fdc_percent_rc}% ({meta_pixels_fdc_rc_len} websites)')
print(f'Google Pixel tracker installations : {google_installed_percent_rc}% ({google_installed_rc_len} websites)')
print(f'Google Pixel FDC : {google_fdc_percent_rc}% ({google_pixels_fdc_rc_len} websites)')

REJECTED COOKIES
Meta Pixel tracker installations : 0.0% (0 websites)
Meta Pixel FDC : 0.0% (0 websites)
Google Pixel tracker installations : 9.3% (12 websites)
Google Pixel FDC : 0.0% (0 websites)
