In [5]:
import csv
import math
from collections import defaultdict, Counter

def is_float(value):
    try:
        float(value)
        return True
    except:
        return False

# Load the data
data = []
with open('2024_fb_posts_president_scored_anon.csv', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for row in reader:
        data.append(row)

# Get all column names
columns = data[0].keys()

# Initialize containers
numeric_stats = defaultdict(list)
non_numeric_stats = defaultdict(list)

# Classify and collect values
for row in data:
    for col, val in row.items():
        val = val.strip()
        if is_float(val):
            numeric_stats[col].append(float(val))
        else:
            non_numeric_stats[col].append(val)

# Compute numeric stats
for col, values in numeric_stats.items():
    count = len(values)
    mean = sum(values) / count
    min_val = min(values)
    max_val = max(values)
    std_dev = math.sqrt(sum((x - mean) ** 2 for x in values) / count)
    print(f"\nNumeric Column: {col}")
    print(f"  Count: {count}, Mean: {mean:.2f}, Min: {min_val}, Max: {max_val}, Std Dev: {std_dev:.2f}")

# Compute non-numeric stats
for col, values in non_numeric_stats.items():
    count = len(values)
    unique_vals = set(values)
    most_common = Counter(values).most_common(1)
    print(f"\nNon-numeric Column: {col}")
    print(f"  Count: {count}, Unique: {len(unique_vals)}, Most Frequent: {most_common}")



Numeric Column: Total Interactions
  Count: 14398, Mean: 2210.15, Min: 0.0, Max: 470087.0, Std Dev: 13066.17

Numeric Column: Likes
  Count: 19009, Mean: 2377.70, Min: 0.0, Max: 351979.0, Std Dev: 11253.17

Numeric Column: Comments
  Count: 19009, Mean: 901.58, Min: 0.0, Max: 93872.0, Std Dev: 3681.88

Numeric Column: Shares
  Count: 19009, Mean: 320.54, Min: 0.0, Max: 76150.0, Std Dev: 1722.11

Numeric Column: Love
  Count: 19009, Mean: 413.88, Min: 0.0, Max: 244482.0, Std Dev: 3730.84

Numeric Column: Wow
  Count: 19009, Mean: 5.87, Min: 0.0, Max: 4345.0, Std Dev: 52.95

Numeric Column: Haha
  Count: 19009, Mean: 105.72, Min: 0.0, Max: 99276.0, Std Dev: 942.01

Numeric Column: Sad
  Count: 19009, Mean: 10.17, Min: 0.0, Max: 56111.0, Std Dev: 418.31

Numeric Column: Angry
  Count: 19009, Mean: 20.06, Min: 0.0, Max: 11814.0, Std Dev: 156.02

Numeric Column: Care
  Count: 19009, Mean: 34.93, Min: 0.0, Max: 85236.0, Std Dev: 790.08

Numeric Column: Post Views
  Count: 16544, Mean: 6485.

In [6]:
# Grouped by page_id
grouped_data = defaultdict(lambda: defaultdict(list))

for row in data:
    page = row.get("page_id", "").strip()
    for col, val in row.items():
        val = val.strip()
        if is_float(val):
            grouped_data[page][col].append(float(val))

# Compute stats per page_id
for page, columns in grouped_data.items():
    print(f"\n=== Stats for page_id: {page} ===")
    for col, values in columns.items():
        count = len(values)
        mean = sum(values) / count
        min_val = min(values)
        max_val = max(values)
        std_dev = math.sqrt(sum((x - mean) ** 2 for x in values) / count)
        print(f"  Column: {col}")
        print(f"    Count: {count}, Mean: {mean:.2f}, Min: {min_val}, Max: {max_val}, Std Dev: {std_dev:.2f}")



=== Stats for page_id:  ===
  Column: Total Interactions
    Count: 14398, Mean: 2210.15, Min: 0.0, Max: 470087.0, Std Dev: 13066.17
  Column: Likes
    Count: 19009, Mean: 2377.70, Min: 0.0, Max: 351979.0, Std Dev: 11253.17
  Column: Comments
    Count: 19009, Mean: 901.58, Min: 0.0, Max: 93872.0, Std Dev: 3681.88
  Column: Shares
    Count: 19009, Mean: 320.54, Min: 0.0, Max: 76150.0, Std Dev: 1722.11
  Column: Love
    Count: 19009, Mean: 413.88, Min: 0.0, Max: 244482.0, Std Dev: 3730.84
  Column: Wow
    Count: 19009, Mean: 5.87, Min: 0.0, Max: 4345.0, Std Dev: 52.95
  Column: Haha
    Count: 19009, Mean: 105.72, Min: 0.0, Max: 99276.0, Std Dev: 942.01
  Column: Sad
    Count: 19009, Mean: 10.17, Min: 0.0, Max: 56111.0, Std Dev: 418.31
  Column: Angry
    Count: 19009, Mean: 20.06, Min: 0.0, Max: 11814.0, Std Dev: 156.02
  Column: Care
    Count: 19009, Mean: 34.93, Min: 0.0, Max: 85236.0, Std Dev: 790.08
  Column: Post Views
    Count: 16544, Mean: 6485.06, Min: 0.0, Max: 4276477

In [7]:
# Grouped by ad_id
grouped_data_pair = defaultdict(lambda: defaultdict(list))

for row in data:
    page = row.get("page_id", "").strip()
    ad = row.get("ad_id", "").strip()
    group_key = (page, ad)
    for col, val in row.items():
        val = val.strip()
        if is_float(val):
            grouped_data_pair[group_key][col].append(float(val))

# Compute stats per (page_id, ad_id)
for (page, ad), columns in grouped_data_pair.items():
    print(f"\n=== Stats for (page_id: {page}, ad_id: {ad}) ===")
    for col, values in columns.items():
        count = len(values)
        mean = sum(values) / count
        min_val = min(values)
        max_val = max(values)
        std_dev = math.sqrt(sum((x - mean) ** 2 for x in values) / count)
        print(f"  Column: {col}")
        print(f"    Count: {count}, Mean: {mean:.2f}, Min: {min_val}, Max: {max_val}, Std Dev: {std_dev:.2f}")



=== Stats for (page_id: , ad_id: ) ===
  Column: Total Interactions
    Count: 14398, Mean: 2210.15, Min: 0.0, Max: 470087.0, Std Dev: 13066.17
  Column: Likes
    Count: 19009, Mean: 2377.70, Min: 0.0, Max: 351979.0, Std Dev: 11253.17
  Column: Comments
    Count: 19009, Mean: 901.58, Min: 0.0, Max: 93872.0, Std Dev: 3681.88
  Column: Shares
    Count: 19009, Mean: 320.54, Min: 0.0, Max: 76150.0, Std Dev: 1722.11
  Column: Love
    Count: 19009, Mean: 413.88, Min: 0.0, Max: 244482.0, Std Dev: 3730.84
  Column: Wow
    Count: 19009, Mean: 5.87, Min: 0.0, Max: 4345.0, Std Dev: 52.95
  Column: Haha
    Count: 19009, Mean: 105.72, Min: 0.0, Max: 99276.0, Std Dev: 942.01
  Column: Sad
    Count: 19009, Mean: 10.17, Min: 0.0, Max: 56111.0, Std Dev: 418.31
  Column: Angry
    Count: 19009, Mean: 20.06, Min: 0.0, Max: 11814.0, Std Dev: 156.02
  Column: Care
    Count: 19009, Mean: 34.93, Min: 0.0, Max: 85236.0, Std Dev: 790.08
  Column: Post Views
    Count: 16544, Mean: 6485.06, Min: 0.0, M