In [1]:
import pandas as pd
from latex_formatter import LatexFormatter as lf

In [2]:
websites = pd.read_csv('data/websites.csv')

In [3]:
# BREAKDOWN BY VERTICALS
total = len(websites)
health = sum(websites['is_health'])
finance = sum(websites['is_finance'])
other = sum(~websites['is_health'] & ~websites['is_finance'])

# BREAKDOWN BY TRACKER
websites_google = websites[websites['has_gtag']]
google = len(websites_google)
websites_meta = websites[websites['has_meta_pixel']]
meta = len(websites_meta)


# GOOGLE SUBCATEGORIES
websites_google_fdc = websites_google[websites_google['google_form_data_collection']]
google_fdc = len(websites_google_fdc)
google_health = sum(websites_google['is_health'])
google_fdc_health = sum(websites_google_fdc['is_health'])
google_finance = sum(websites_google['is_finance'])
google_fdc_finance = sum(websites_google_fdc['is_finance'])
google_other = sum(~websites_google['is_health'] & ~websites_google['is_finance'])
google_fdc_other = sum(~websites_google_fdc['is_health'] & ~websites_google_fdc['is_finance'])   


# META SUBCATEGORIES
websites_meta_fdc = websites_meta[websites_meta['meta_form_data_collection']]
meta_fdc = len(websites_meta_fdc)
meta_health = sum(websites_meta['is_health'])
meta_fdc_health = sum(websites_meta_fdc['is_health'])
meta_finance = sum(websites_meta['is_finance'])
meta_fdc_finance = sum(websites_meta_fdc['is_finance'])
meta_other = sum(~websites_meta['is_health'] & ~websites_meta['is_finance'])
meta_fdc_other = sum(~websites_meta_fdc['is_health'] & ~websites_meta_fdc['is_finance'])


In [4]:
# PERCENTAGES
google_percentage = lf.frac(100 * google / total)
google_health_percentage = lf.frac(100 * google_health / health)
google_finance_percentage = lf.frac(100 * google_finance / finance)
google_other_percentage = lf.frac(100 * google_other / other)
google_fdc_percentage = lf.frac(100 * google_fdc / google)
google_fdc_health_percentage = lf.frac(100 * google_fdc_health / google_health)
google_fdc_finance_percentage = lf.frac(100 * google_fdc_finance / google_finance)
google_fdc_other_percentage = lf.frac(100 * google_fdc_other / google_other)


meta_percentage = lf.frac(100 * meta / total)
meta_health_percentage = lf.frac(100 * meta_health / health)
meta_finance_percentage = lf.frac(100 * meta_finance / finance)
meta_other_percentage = lf.frac(100 * meta_other / other)
meta_fdc_percentage = lf.frac(100 * meta_fdc / meta)
meta_fdc_health_percentage = lf.frac(100 * meta_fdc_health / meta_health)
meta_fdc_finance_percentage = lf.frac(100 * meta_fdc_finance / meta_finance)
meta_fdc_other_percentage = lf.frac(100 * meta_fdc_other / meta_other)


In [5]:
# TESTS 
assert total == health + finance + other -  sum(websites['is_health'] & websites['is_finance'])
assert set(websites['has_gtag']) == set([True, False])
assert set(websites['has_meta_pixel']) == set([True, False])
assert set(websites['google_form_data_collection']) == set([True, False])
assert set(websites['meta_form_data_collection']) == set([True, False])


In [6]:
table_4 = f"""
    \\begin{{table*}}[t]
      \\centering
      \\small
      \\begin{{tabular}}{{l r rrr rrr}}
        \\toprule
        \\multirow{{3}}{{*}}{{\\normalsize\\textbf{{Vertical}}}} &  \\multirow{{3}}{{*}}{{\\normalsize\\textbf{{Websites}}}} & \\multicolumn{{3}}{{c}}{{\\normalsize{{\\textbf{{Google}}}}}} & \\multicolumn{{3}}{{c}}{{\\normalsize{{\\textbf{{Meta}}}}}}  \\\\  
          & & \\multicolumn{{2}}{{c}}{{\\textbf{{\\tracker[u]}}}}  &  \\multicolumn{{1}}{{c}}{{\\textbf{{\\dynamica[u]}}}}  & \\multicolumn{{2}}{{c}}{{\\textbf{{\\tracker[u]}}}}   &  \\multicolumn{{1}}{{c}}{{\\textbf{{\\dynamica[u]}}}} \\\\  
          & & Websites & Vertical Websites  & \\tracker[u]  & Websites &  Vertical Websites  &  \\tracker[u] \\\\
        \\cmidrule(lr){{1-2}}  \\cmidrule(lr){{3-5}}  \\cmidrule(lr){{6-8}} 
        \\rowcolor{{lightgray}} \\textbf{{Non-Sensitive}} & {lf.num(other)} & {lf.num(google_other)} & {google_other_percentage} & {google_fdc_other_percentage} &{lf.num(meta_other)}  &{meta_other_percentage}  & {meta_fdc_other_percentage} \\\\
        \\textbf{{Health}}& {lf.num(health)}  & {lf.num(google_health)} & {google_health_percentage} & {google_fdc_health_percentage}  & {lf.num(meta_health)}  & {meta_health_percentage}& {meta_fdc_health_percentage} \\\\
        \\rowcolor{{lightgray}} \\textbf{{Finance}} & {lf.num(finance)}  & {lf.num(google_finance)}  &{google_finance_percentage}  & {google_fdc_finance_percentage}  & {lf.num(meta_finance)}  & {meta_finance_percentage} & {meta_fdc_finance_percentage} \\\\
        \\textbf{{Total}} & {lf.num(total)} &  {lf.num(google)} & {google_percentage} & {google_fdc_percentage} & {lf.num(meta)} & {meta_percentage} & {meta_fdc_percentage} \\\\
        \\bottomrule
      \\end{{tabular}}
      \\caption{{Breakdown of \\tracker[s] and \\dynamica for Google and Meta on different verticals.}}
      \\label{{tab:vertical_breakdown}}
    \\end{{table*}}
"""

print(table_4)


    \begin{table*}[t]
      \centering
      \small
      \begin{tabular}{l r rrr rrr}
        \toprule
        \multirow{3}{*}{\normalsize\textbf{Vertical}} &  \multirow{3}{*}{\normalsize\textbf{Websites}} & \multicolumn{3}{c}{\normalsize{\textbf{Google}}} & \multicolumn{3}{c}{\normalsize{\textbf{Meta}}}  \\  
          & & \multicolumn{2}{c}{\textbf{\tracker[u]}}  &  \multicolumn{1}{c}{\textbf{\dynamica[u]}}  & \multicolumn{2}{c}{\textbf{\tracker[u]}}   &  \multicolumn{1}{c}{\textbf{\dynamica[u]}} \\  
          & & Websites & Vertical Websites  & \tracker[u]  & Websites &  Vertical Websites  &  \tracker[u] \\
        \cmidrule(lr){1-2}  \cmidrule(lr){3-5}  \cmidrule(lr){6-8} 
        \rowcolor{lightgray} \textbf{Non-Sensitive} & 35,113 & 25,471 & 72.5\% & 11.5\% &9,731  &27.7\%  & 68.0\% \\
        \textbf{Health}& 3,406  & 2,565 & 75.3\% & 11.6\%  & 1,075  & 31.6\%& 30.8\% \\
        \rowcolor{lightgray} \textbf{Finance} & 1,633  & 1,103  &67.5\%  & 13.4\%  & 503  & 30.8\% & 20.3\