In [1]:
import csv
import math
from collections import defaultdict, Counter

FILE_PATH = "2024_fb_posts_president_scored_anon.csv"

def is_number(s):
    try:
        float(s.replace(",", ""))  # remove commas from numbers
        return True
    except:
        return False

def to_number(s):
    try:
        return float(s.replace(",", ""))
    except:
        return None

def mean(lst):
    return sum(lst) / len(lst) if lst else 0

def std_dev(lst):
    if len(lst) < 2:
        return 0
    avg = mean(lst)
    variance = sum((x - avg) ** 2 for x in lst) / len(lst)
    return math.sqrt(variance)

def read_csv(file_path):
    with open(file_path, mode="r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        return list(reader)

def summarize(data, label="Overall Summary"):
    print(f"\n{'=' * 60}")
    print(f"{label}")
    print(f"{'=' * 60}")

    if not data:
        print("No data found in this group.\n")
        return

    columns = data[0].keys()
    for col in columns:
        values = [row[col].strip() for row in data if row[col].strip()]
        numeric_vals = [to_number(v) for v in values if is_number(v)]

        if numeric_vals:
            print(f"\n📊 {col} (Numeric)")
            print(f"    Count   : {len(numeric_vals)}")
            print(f"    Mean    : {mean(numeric_vals):.2f}")
            print(f"    Min     : {min(numeric_vals)}")
            print(f"    Max     : {max(numeric_vals)}")
            print(f"    Std Dev : {std_dev(numeric_vals):.2f}")
        elif values:
            counter = Counter(values)
            most_common = counter.most_common(1)[0]
            print(f"\n📝 {col} (Categorical)")
            print(f"    Unique Values : {len(counter)}")
            print(f"    Most Frequent : '{most_common[0]}' ({most_common[1]} times)")
    print()

def group_by(data, keys):
    groups = defaultdict(list)
    for row in data:
        key = tuple(row[k] for k in keys)
        groups[key].append(row)
    return groups

if __name__ == "__main__":
    data = read_csv(FILE_PATH)

    # Overall stats
    summarize(data, label="🌐 Overall Facebook Posts Summary")

    # Grouped by Facebook_Id (top 5)
    fb_groups = group_by(data, ["Facebook_Id"])
    for key in list(fb_groups.keys())[:5]:
        summarize(fb_groups[key], label=f"📁 Group: Facebook_Id = {key[0]}")

    # Grouped by Facebook_Id and post_id (top 5)
    fb_post_groups = group_by(data, ["Facebook_Id", "post_id"])
    for key in list(fb_post_groups.keys())[:5]:
        summarize(fb_post_groups[key], label=f"🔗 Group: Facebook_Id = {key[0]}, post_id = {key[1]}")



🌐 Overall Facebook Posts Summary

📝 Facebook_Id (Categorical)
    Unique Values : 21
    Most Frequent : '32fc18da91029ff09bf74fe9887eace6b5d2145809d583f696e344530508b064' (9013 times)

📝 post_id (Categorical)
    Unique Values : 19009
    Most Frequent : '8570b69695e00d8f06b12398ed525497e1712b5369c6fc2138fe98f69811c138' (1 times)

📝 Page Category (Categorical)
    Unique Values : 6
    Most Frequent : 'PERSON' (9453 times)

📝 Page Admin Top Country (Categorical)
    Unique Values : 1
    Most Frequent : 'US' (16280 times)

📝 Post Created (Categorical)
    Unique Values : 18951
    Most Frequent : '2023-11-14 11:11:44 EST' (2 times)

📝 Post Created Date (Categorical)
    Unique Values : 425
    Most Frequent : '2024-10-31' (103 times)

📝 Post Created Time (Categorical)
    Unique Values : 16102
    Most Frequent : '19:42:00' (7 times)

📝 Type (Categorical)
    Unique Values : 9
    Most Frequent : 'Link' (7404 times)

📊 Total Interactions (Numeric)
    Count   : 19009
    Mean    : 41