In [1]:
import polars as pl
import sys

# File paths
input_file = r"C:\Study\SYRACUSE\RA\Assignment 1\period_03\2024_fb_posts_president_scored_anon.csv"
output_file = "Output_Polars_fb_posts.txt"

def print_section(title):
    print(f"\n{'='*10} {title} {'='*10}")

def safe_cast_numeric(df, columns):
    """Cast selected columns to Float64, skipping those not found."""
    safe_cols = []
    for col in columns:
        if col in df.columns:
            safe_cols.append(pl.col(col).cast(pl.Float64, strict=False))
    return df.with_columns(safe_cols)

def describe_group(df, group_name):
    """Prints a group summary using .describe()."""
    print(f"\nGroup: {group_name}")
    print(df.describe())

def main():
    with open(output_file, 'w', encoding='utf-8') as f:
        sys.stdout = f

        
        df = pl.read_csv(input_file)

        df = safe_cast_numeric(df, ["Likes", "Comments", "Shares", "Overperforming Score"])

        print_section("Header and Sample Rows")
        print("Columns:", df.columns)
        print(df.head())

        print_section("Descriptive Statistics for Entire Dataset")
        print(df.describe())

        print_section("Top Facebook_Id Value Counts")
        top_ids = df.group_by("Facebook_Id").len().sort("len", descending=True).head(3)
        print(top_ids)

        print_section("Grouped by Facebook_Id (First 3)")
        for i, fb_id in enumerate(df.select("Facebook_Id").unique().to_series().to_list()):
            if i >= 3:
                break
            group = df.filter(pl.col("Facebook_Id") == fb_id)
            describe_group(group, f"Facebook_Id = {fb_id}")

        print_section("Grouped by Facebook_Id and post_id (First 3)")
        combo_ids = df.select(["Facebook_Id", "post_id"]).unique()
        for i in range(min(3, combo_ids.height)):
            fb_id = combo_ids[i, "Facebook_Id"]
            post_id = combo_ids[i, "post_id"]
            group = df.filter((pl.col("Facebook_Id") == fb_id) & (pl.col("post_id") == post_id))
            describe_group(group, f"Facebook_Id = {fb_id}, post_id = {post_id}")

        print_section("Script Completed")
        sys.stdout = sys.__stdout__

if __name__ == "__main__":
    main()
