In [None]:
import polars as pl

# Load dataset
df = pl.read_csv("2024_tw_posts_president_scored_anon.csv")

# Deduplicate columns
seen = set()
deduped_cols = [col for col in df.columns if not (col in seen or seen.add(col))]
df = df.select([pl.col(c) for c in deduped_cols])

#  Numeric Summary 
numeric_df = df.select(pl.col(pl.Float64, pl.Int64))
numeric_summary = numeric_df.describe()
numeric_summary.write_csv("tw_posts_numeric_summary.csv")

#  Non-Numeric Summary 
non_numeric_df = df.select(pl.col(pl.Utf8))
non_numeric_rows = []
for col in non_numeric_df.columns:
    unique_count = df.select(pl.col(col).n_unique()).item()
    vc = df.select(pl.col(col).value_counts()).unnest(col)
    if vc.height > 0:
        most_common_val = vc[0, col]
        most_common_count = vc[0, "count"]
    else:
        most_common_val = None
        most_common_count = 0

    non_numeric_rows.append({
        "column": col,
        "unique_values": unique_count,
        "most_frequent_value": most_common_val,
        "frequency": most_common_count
    })
pl.DataFrame(non_numeric_rows).write_csv("tw_posts_non_numeric_summary.csv")

#  Group by tweet ID 
if "id" in df.columns and "likeCount" in df.columns:
    grouped_by_id = df.group_by("id").agg([
        pl.len().alias("tweet_count"),
        pl.col("likeCount").mean().alias("avg_likes"),
        pl.col("likeCount").min().alias("min_likes"),
        pl.col("likeCount").max().alias("max_likes")
    ])
    grouped_by_id.write_csv("tw_posts_grouped_by_id.csv")

#  Group by id and source 
if all(col in df.columns for col in ["id", "source", "likeCount"]):
    grouped_by_id_source = df.group_by(["id", "source"]).agg([
        pl.len().alias("entry_count"),
        pl.col("likeCount").mean().alias("avg_likes"),
        pl.col("likeCount").min().alias("min_likes"),
        pl.col("likeCount").max().alias("max_likes")
    ])
    grouped_by_id_source.write_csv("tw_posts_grouped_by_id_source.csv")

print("✅ Twitter posts analysis CSVs saved.")

✅ Twitter posts analysis CSVs saved.
