In [1]:
file_name =  "trackers.csv"

import pandas as pd
df = pd.read_csv(file_name)

In [2]:
from urllib.parse import urlparse

def count_apps(apps):
    count = {}
    for app in apps:
        if app in count:
            count[app]  += 1
        else:
            count[app] = 1
    
    return(dict(sorted(count.items(), key=lambda item: item[1], reverse=True)))

def count_urls(urls):
    count = {}
    for url in urls:
        parsed_url = urlparse('//'+str(url))
        domain = '{uri.netloc}'.format(uri=parsed_url)
        if domain in count:
            count[domain]  += 1
        else:
            count[domain] = 1
    return(dict(sorted(count.items(), key=lambda item: item[1], reverse=True)))

def count_counts(counts):
    counts_of_counts = {}
    for count in counts.values():
        if count in counts_of_counts:
            counts_of_counts[count] += 1
        else:
            counts_of_counts[count] = 1
    return(dict(sorted(counts_of_counts.items(), reverse=False)))

apps = count_apps(df['app'])
domains = count_urls(df['tracker'])


In [3]:
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.ticker import MaxNLocator
import seaborn as sns
import numpy as np
graphical = False  

if(not graphical):
    matplotlib.use("pgf")

plt.rcParams["figure.figsize"] = [6, 4]
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'Linux Libertine O',
    'font.size': 9
})
plt.rcParams['figure.constrained_layout.use'] = True

def bar_horizontal(c, title):
    sns.set_style('whitegrid')
    sns.set_palette('colorblind')
    ax = sns.barplot(x =list(c.values()), y=list(c.keys()), data=df)
    #ax.set_title(title)
    plt.xlabel('Percent of participants')
    if graphical:
            plt.show()
    else:
        plt.savefig(title + '.pgf', bbox_inches='tight')
    matplotlib.pyplot.close()

def bar_vertical(c, title):
    sns.set_style('whitegrid')
    sns.set_palette('colorblind')
    ax = sns.barplot(y =list(c.values()), x=list(c.keys()), data=df)
    #ax.set_title(title)
    plt.setp(ax.get_xticklabels())
    plt.xlabel('Number of trackers')
    plt.ylabel('Number of extensions')
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))

    ymin, ymax = ax.get_ylim()
    for yval in range(int(ymin), int(ymax)+1):
        ax.axhline(y=yval, color='gray', linestyle='-', alpha=1)

    if graphical:
            plt.show()
    else:
        plt.savefig(title + '.pgf', bbox_inches='tight')
    matplotlib.pyplot.close()

def pie(c, title):
    data = list(c.values())
    labels = list(c.keys())
    colours = sns.color_palette("colorblind")
    fig, ax = plt.subplots()
    #ax.pie(data, labels=labels, colors=colours, autopct='%1.1f%%')

    total = 0
    for i in data:
        total += i
    plt.xlabel('Number of trackers per app')
    #ax.pie(data, labels=[f'{l} ({s} apps)' for l, s in zip(labels, data)], colors=colours)
    ax.stairs(data, labels)
    #ax.set_title(title)
    if graphical:
            plt.show()
    else:
        plt.savefig(title + '.pgf', bbox_inches='tight')
    matplotlib.pyplot.close()

def print_latex_table(c, title):
    if graphical:
        print("\subsubsection{%s}" % title)
        print("""
            \\begin{table}[htbp]
            \centering
            \small
            \caption{%s}
            \\begin{tabular}{l|rr} \\toprule
            \\textbf{Item} & \\textbf{no.} \\\\\\midrule""" % title)
        for key, value in c.items():
            print("""           %s & %s \\\\""" % (key, value))
            
        print("""
            \\bottomrule
            \end{tabular}
        \end{table}
        """)
    else:
        f.write("\subsubsection{%s}" % title)
        f.write("""
            \\begin{table}[htbp]
            \centering
            \small
            \caption{%s}
            \\begin{tabular}{l|rr} \\toprule
            \\textbf{Item} & \\textbf{no.} \\\\\\midrule""" % title)
        for key, value in c.items():
            f.write("""           %s & %s \\\\""" % (key, value))
        f.write("""
            \\bottomrule
            \end{tabular}
        \end{table}
        """)

In [4]:
#print(count_counts(apps))

bar_horizontal(domains,"Request per URL")
#print_latex_table(domains,"Request per URL")
counts = count_counts(apps)
print(counts)
bar_vertical(counts, "Trackers per app")
#pie(counts, "")
#print_latex_table(apps, "Trackers per app")

{1: 19, 2: 12, 3: 12, 4: 9, 5: 9, 6: 6, 7: 3, 8: 2, 9: 3, 10: 1, 11: 3, 13: 4, 14: 1, 15: 1, 16: 1, 17: 2, 18: 3, 19: 2, 20: 1, 21: 2, 22: 1, 25: 3, 26: 6, 27: 1, 29: 1, 32: 1, 33: 1, 35: 2, 37: 1, 39: 1}
  agg_filter: a filter function, which takes a (m, n, 3) float array and a dpi value, and returns a (m, n, 3) array and two offsets from the bottom left corner of the image
  alpha: scalar or None
  animated: bool
  backgroundcolor: color
  bbox: dict with properties for `.patches.FancyBboxPatch`
  clip_box: `.Bbox`
  clip_on: bool
  clip_path: Patch or (Path, Transform) or None
  color or c: color
  figure: `.Figure`
  fontfamily or family: {FONTNAME, 'serif', 'sans-serif', 'cursive', 'fantasy', 'monospace'}
  fontproperties or font or font_properties: `.font_manager.FontProperties` or `str` or `pathlib.Path`
  fontsize or size: float or {'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large'}
  fontstretch or stretch: {a numeric value in range 0-1000, 'ultra-conden