In [1]:
import polars as pl
import sys

# File paths
input_file = r"C:\Study\SYRACUSE\RA\Assignment 1\period_03\2024_fb_ads_president_scored_anon.csv"
output_file = "Output_Polars_fb_ads_stats.txt"

def print_section(title):
    print(f"\n{'=' * 25}\n{title}\n{'=' * 25}")

def main():
    with open(output_file, "w", encoding="utf-8") as f:
        sys.stdout = f

        # Load the dataset
        df = pl.read_csv(input_file)

        
        numeric_cols = ["estimated_audience_size", "estimated_impressions", "scam_illuminating"]
        for col in numeric_cols:
            if col in df.columns:
                df = df.with_columns(pl.col(col).cast(pl.Float64, strict=False))

       
        print_section("Descriptive Statistics for Entire Dataset")
        print(df.describe())

       
        print_section("Top 3 Most Common page_id Values")
        top_pages = df.group_by("page_id").len().sort("len", descending=True).head(3)
        print(top_pages)

        
        print_section("Grouped by page_id (first 3 groups)")
        unique_page_ids = df.select("page_id").unique().to_series().to_list()
        for i, page_id in enumerate(unique_page_ids[:3]):
            print(f"\nGroup: page_id = {page_id}")
            group_df = df.filter(pl.col("page_id") == page_id)
            print(group_df.describe())

        
        print_section("Grouped by page_id and ad_id (first 3 groups)")
        unique_combos = df.select(["page_id", "ad_id"]).unique().to_dict(as_series=False)
        for i in range(min(3, len(unique_combos["page_id"]))):
            page = unique_combos["page_id"][i]
            ad = unique_combos["ad_id"][i]
            group_df = df.filter((pl.col("page_id") == page) & (pl.col("ad_id") == ad))
            print(f"\nGroup: page_id = {page}, ad_id = {ad}")
            print(group_df.describe())

        print_section("Script Completed")
        print("All descriptive stats and groupings printed successfully.")
        sys.stdout = sys.__stdout__

if __name__ == "__main__":
    main()
