In [1]:
DATA_URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00368/Facebook_metrics.zip"

In [2]:
import urllib.request
urllib.request.urlretrieve(DATA_URL, "metrics.zip")
import zipfile
with zipfile.ZipFile("metrics.zip", 'r') as zip_ref:
    zip_ref.extractall('data')

In [3]:
import csv
with open('data/dataset_Facebook.csv') as csvfile:
    reader = csv.DictReader(csvfile, delimiter=";")
    data = list(reader)

In [4]:
from collections import defaultdict
types = defaultdict(list)
for row in data:
    ctype = row['Type']
    for key in row.keys():
        try:
            row[key] = int(row[key] or 0)
        except:
            pass
    types[(1, ctype)].append(row)
types[(0, 'Total')] = data

In [5]:
from statistics import mean, median, mode, StatisticsError
from collections import OrderedDict

def safe_mode(lst):
    try:
        return mode(lst)
    except StatisticsError:
        return ""

funcs_to_calc = OrderedDict([
    ('Max', max),
    ('Min', min),
    ('Avg', mean),
    ('Median', median),
    ('Mode', safe_mode),
])
columns_to_count = [
    'like',
    'share',
    'Page total likes',
]

In [6]:
def calculate_on_column(data, colname, func, fmt="{:.3f}"):
    lst = (x[colname] for x in data)
    ret = func(lst)
    if isinstance(ret, str):
        return ret
    return fmt.format(ret)

In [7]:
from ipy_table import make_table, apply_theme, set_global_style
from IPython.core.display import display, HTML
result = []
for (_, ctype), data in sorted(types.items()):
    tbl = [
        [''] + list(funcs_to_calc.keys()),
    ]
    for col in columns_to_count:
        tbl.append([col])
        for funcname, func in funcs_to_calc.items():
            tbl[-1].append(calculate_on_column(data, col, func))
    result.append((
        '<h1>{}</h1>'.format(ctype), make_table(tbl)
    ))
    apply_theme('basic')
render = ''.join(
    x[0] + x[1]._repr_html_() for x in result
)
display(HTML(render))

0,1,2,3,4,5
,Max,Min,Avg,Median,Mode
like,5172.000,0.000,177.590,101.000,98.000
share,790.000,0.000,27.048,19.000,
Page total likes,139441.000,81370.000,123194.176,129600.000,136393.000

0,1,2,3,4,5
,Max,Min,Avg,Median,Mode
like,379.000,5.000,73.318,37.000,
share,44.000,0.000,12.909,10.500,2.000
Page total likes,138895.000,85979.000,116363.182,115396.000,138353.000

0,1,2,3,4,5
,Max,Min,Avg,Median,Mode
like,5172.000,0.000,182.183,99.500,
share,790.000,0.000,26.904,19.000,14.000
Page total likes,139441.000,81370.000,122354.171,128032.000,124940.000

0,1,2,3,4,5
,Max,Min,Avg,Median,Mode
like,859.000,13.000,176.711,150.000,
share,123.000,1.000,31.422,28.000,
Page total likes,139441.000,104070.000,132647.044,135713.000,139441.000

0,1,2,3,4,5
,Max,Min,Avg,Median,Mode
like,449.000,65.000,231.429,204.000,
share,121.000,13.000,52.143,44.000,13.000
Page total likes,138895.000,126424.000,135014.857,137893.000,137893.000


In [8]:
TARGET_COL = 'Total Interactions'
from functools import reduce
def get_most_popular(left, right):
    return left if left[TARGET_COL] > right[TARGET_COL] else right
most_popular = reduce(get_most_popular, data)
make_table(list(most_popular.items()))

0,1
Post Month,11
Lifetime Post Total Reach,100768
Post Weekday,5
comment,17
Lifetime Post Impressions by people who have liked your Page,59658
Lifetime Post Total Impressions,220447
share,84
Paid,1
Total Interactions,550
Lifetime Engaged Users,2101
