In [1]:
import pandas as pd

DATA_FILE = "2024_fb_posts_president_scored_anon.csv"

# Load dataset
df = pd.read_csv(DATA_FILE)

# Identify column types
numeric_cols = df.select_dtypes(include='number').columns.tolist()
non_numeric_cols = df.select_dtypes(exclude='number').columns.tolist()

# === Summary Printer ===
def summarize(df_part, label="Summary"):
    print(f"\n{'=' * 60}")
    print(label)
    print(f"{'=' * 60}")

    if df_part.empty:
        print("No records found.\n")
        return

    # Numeric columns
    if numeric_cols:
        print("\n📊 Numeric Columns:")
        desc = df_part[numeric_cols].describe().transpose()
        print(desc.round(2))

    # Non-numeric summary
    print("\n📝 Non-Numeric Columns:")
    for col in non_numeric_cols:
        vc = df_part[col].value_counts(dropna=True)
        if not vc.empty:
            most_common = vc.index[0]
            most_count = vc.iloc[0]
            print(f"- {col}:")
            print(f"    Unique values  : {df_part[col].nunique(dropna=True)}")
            print(f"    Most frequent  : '{most_common}' ({most_count} times)")

# === Overall summary ===
summarize(df, "🌐 Overall Facebook Posts Summary")

# === Grouped by Facebook_Id (first 5)
for fb_id, group in list(df.groupby("Facebook_Id"))[:5]:
    summarize(group, f"📁 Grouped by Facebook_Id = {fb_id}")

# === Grouped by Facebook_Id and post_id (Top 10 pairs)
top_pairs = df.groupby(["Facebook_Id", "post_id"]).size().sort_values(ascending=False).head(10).index
for fb_id, post_id in top_pairs:
    group = df[(df["Facebook_Id"] == fb_id) & (df["post_id"] == post_id)]
    summarize(group, f"🔗 Grouped by Facebook_Id = {fb_id}, post_id = {post_id}")


🌐 Overall Facebook Posts Summary

📊 Numeric Columns:
                                                      count     mean  \
Likes                                               19009.0  2377.70   
Comments                                            19009.0   901.58   
Shares                                              19009.0   320.54   
Love                                                19009.0   413.88   
Wow                                                 19009.0     5.87   
Haha                                                19009.0   105.72   
Sad                                                 19009.0    10.17   
Angry                                               19009.0    20.06   
Care                                                19009.0    34.93   
Post Views                                          16544.0  6485.06   
Total Views                                         16544.0  7461.85   
Total Views For All Crossposts                      16544.0  3555.94   
Sponsor Id