**Loading data**


In [44]:
import polars as pl

filepath="/content/2024_fb_posts_president_scored_anon.csv"

df = pl.read_csv(
    filepath,
    null_values=["-", ""],
    ignore_errors=True,
    truncate_ragged_lines=True
)

df = df.with_columns([pl.col(col).cast(pl.Utf8, strict=False) for col in df.columns])

**DataFrame.describe() — for general stats**

In [45]:
print("=== Overall dataset describe ===")
print(df.describe())



=== Overall dataset describe ===
shape: (9, 57)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ statistic ┆ Facebook_ ┆ post_id   ┆ Page      ┆ … ┆ incivilit ┆ scam_illu ┆ freefair_ ┆ fraud_il │
│ ---       ┆ Id        ┆ ---       ┆ Category  ┆   ┆ y_illumin ┆ minating  ┆ illuminat ┆ luminati │
│ str       ┆ ---       ┆ str       ┆ ---       ┆   ┆ ating     ┆ ---       ┆ ing       ┆ ng       │
│           ┆ str       ┆           ┆ str       ┆   ┆ ---       ┆ str       ┆ ---       ┆ ---      │
│           ┆           ┆           ┆           ┆   ┆ str       ┆           ┆ str       ┆ str      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ count     ┆ 23892     ┆ 23891     ┆ 23365     ┆ … ┆ 23891     ┆ 21993     ┆ 23890     ┆ 23891    │
│ null_coun ┆ 0         ┆ 1         ┆ 527       ┆ … ┆ 1         ┆ 1899      ┆ 2         ┆ 1        │
│ t         ┆           ┆           ┆      

**nunique() — for unique counts**

In [46]:
print("\n=== Unique value counts for each column ===")
for col in df.columns:
    unique_count = df[col].n_unique()
    print(f"{col}: {unique_count} unique values")



=== Unique value counts for each column ===
Facebook_Id: 23 unique values
post_id: 17065 unique values
Page Category: 8 unique values
Page Admin Top Country: 3 unique values
Post Created: 17055 unique values
Post Created Date: 368 unique values
Post Created Time: 14553 unique values
Type: 11 unique values
Total Interactions: 4610 unique values
Likes: 3413 unique values
Comments: 2642 unique values
Shares: 1542 unique values
Love: 1601 unique values
Wow: 195 unique values
Haha: 1155 unique values
Sad: 240 unique values
Angry: 424 unique values
Care: 514 unique values
Video Share Status: 5 unique values
Is Video Owner?: 3 unique values
Post Views: 2843 unique values
Total Views: 2902 unique values
Total Views For All Crossposts: 74 unique values
Video Length: 776 unique values
Sponsor Id: 1 unique values
Sponsor Name: 1 unique values
Sponsor Category: 1 unique values
Overperforming Score: 2009 unique values
illuminating_scored_messageelection_integrity_Truth_illuminating: 2 unique value

**value_counts() — for most frequent values**

In [47]:
print("\n=== Most frequent value for each column ===")
for col in df.columns:
    vc = df[col].value_counts()
    count_col = vc.columns[1]
    vc = vc.sort(count_col, descending=True)
    if vc.height > 0:
        val, cnt = vc.row(0)
        print(f"{col}: most frequent = {val} (count = {cnt})")




=== Most frequent value for each column ===
Facebook_Id: most frequent = 32fc18da91029ff09bf74fe9887eace6b5d2145809d583f696e344530508b064 (count = 12052)
post_id: most frequent = 6e654d37a2ceb7ce46fb6adbffd287dae995d28c54911589f301cc5718b22ebb (count = 2)
Page Category: most frequent = PERSON (count = 15058)
Page Admin Top Country: most frequent = US (count = 22882)
Post Created: most frequent = 2023-12-22 11:22:14 EST (count = 4)
Post Created Date: most frequent = 2024-01-10 (count = 204)
Post Created Time: most frequent = 17:30:01 (count = 10)
Type: most frequent = Link (count = 10585)
Total Interactions: most frequent = 15 (count = 174)
Likes: most frequent = 9 (count = 401)
Comments: most frequent = 0 (count = 1230)
Shares: most frequent = 0 (count = 2809)
Love: most frequent = 0 (count = 7019)
Wow: most frequent = 0 (count = 10301)
Haha: most frequent = 0 (count = 6621)
Sad: most frequent = 0 (count = 13458)
Angry: most frequent = 0 (count = 9495)
Care: most frequent = 0 (count =

Group by "Facebook_Id"  and compute same stats

In [51]:
grouped = df.group_by("Facebook_Id").agg([
    *[pl.col(col).cast(pl.Float64, strict=False).mean().alias(f"{col}_mean") for col in numeric_cols],
    *[pl.col(col).cast(pl.Float64, strict=False).min().alias(f"{col}_min") for col in numeric_cols],
    *[pl.col(col).cast(pl.Float64, strict=False).max().alias(f"{col}_max") for col in numeric_cols],
    *[pl.col(col).cast(pl.Float64, strict=False).std().alias(f"{col}_std") for col in numeric_cols],
])

print(grouped.head(10))

shape: (10, 173)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ Facebook_ ┆ Total Int ┆ Likes_mea ┆ Comments_ ┆ … ┆ incivilit ┆ scam_illu ┆ freefair_ ┆ fraud_il │
│ Id        ┆ eractions ┆ n         ┆ mean      ┆   ┆ y_illumin ┆ minating_ ┆ illuminat ┆ luminati │
│ ---       ┆ _mean     ┆ ---       ┆ ---       ┆   ┆ ating_std ┆ std       ┆ ing_std   ┆ ng_std   │
│ str       ┆ ---       ┆ f64       ┆ f64       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│           ┆ f64       ┆           ┆           ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ a3fa0d15d ┆ 642.44094 ┆ 1607.3430 ┆ 333.97865 ┆ … ┆ 0.379185  ┆ 0.096972  ┆ 0.056165  ┆ 0.118482 │
│ d83b91029 ┆ 5         ┆ 83        ┆ 6         ┆   ┆           ┆           ┆           ┆          │
│ 5d0b17f9d ┆           ┆           ┆           ┆   ┆           ┆         

Group by "Facebook_Id" and "post_id" and compute same stats

In [53]:
grouped = df.group_by(["Facebook_Id", "post_id"]).agg([
    *[pl.col(col).cast(pl.Float64, strict=False).mean().alias(f"{col}_mean") for col in numeric_cols],
    *[pl.col(col).cast(pl.Float64, strict=False).min().alias(f"{col}_min") for col in numeric_cols],
    *[pl.col(col).cast(pl.Float64, strict=False).max().alias(f"{col}_max") for col in numeric_cols],
    *[pl.col(col).cast(pl.Float64, strict=False).std().alias(f"{col}_std") for col in numeric_cols],
])

print(grouped.head(10))

shape: (10, 174)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ Facebook_ ┆ post_id   ┆ Total Int ┆ Likes_mea ┆ … ┆ incivilit ┆ scam_illu ┆ freefair_ ┆ fraud_il │
│ Id        ┆ ---       ┆ eractions ┆ n         ┆   ┆ y_illumin ┆ minating_ ┆ illuminat ┆ luminati │
│ ---       ┆ str       ┆ _mean     ┆ ---       ┆   ┆ ating_std ┆ std       ┆ ing_std   ┆ ng_std   │
│ str       ┆           ┆ ---       ┆ f64       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---      │
│           ┆           ┆ f64       ┆           ┆   ┆ f64       ┆ f64       ┆ f64       ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ fedbfaab1 ┆ 9796c87b5 ┆ null      ┆ 10027.0   ┆ … ┆ null      ┆ null      ┆ null      ┆ null     │
│ 9b5112a68 ┆ 9bc7c0e6f ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 89b862314 ┆ 05900bbbb ┆           ┆           ┆   ┆           ┆         