In [0]:
%run /Workspace/Users/rishak1997@gmail.com/google-fit-ETL-and-analytics/gold_layer_utils

In [0]:
from concurrent.futures import ProcessPoolExecutor, as_completed

In [0]:
%sql

create schema if not exists google_fit.gold

In [0]:
%sql

create volume if not exists google_fit.gold.weekly_plots_activity_metrics

In [0]:
daily_activity_metric_df_base = CreateDF.from_table(schema_name= "silver", table_name= "daily_activity_metrics")

In [0]:
prefix_daily = lambda col : f"daily_{col}" if col not in ["date", "week", "month", "entity"] else col

daily_activity_metric_df_base = (
    daily_activity_metric_df_base.select(
        *[  
            col
            for col in daily_activity_metric_df_base.columns
            if not re.search(r"max_|min_|average_|cycling|running", col)
        ]
    )
    .withColumn("week", F.next_day("date", 'sunday').cast(DateType()))
)

In [0]:
daily_activity_metric_df_renamed = daily_activity_metric_df_base.toDF(*map(prefix_daily, daily_activity_metric_df_base.columns))

## **Weekly metrics**

In [0]:
weekly_df_da = DeclarativeAggregations(
    daily_activity_metric_df_renamed.select(["entity", "date", "week", "daily_move_minutes_count", "daily_calories_kcal", "daily_distance_m", "daily_heart_points", "daily_heart_minutes", "daily_step_count", "daily_walking_duration_ms"])
)

weekly_df_da.define_entities()
weekly_df_da.clear_agg_config()
weekly_df_da.clear_df_trans()

weekly_df_da.df.display()

In [0]:
weekly_df_da.current_attributes()

In [0]:
weekly_df_da.build_agg_config(agg_metric="rank", group_by_cols=["entity"], order_by= 'week',  name= "drank")
weekly_df_da.build_trans_df()

In [0]:
weekly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "week"], agg_on_col="daily_move_minutes_count", name= "current_week_move_minutes")
weekly_df_da.build_trans_df()

weekly_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_week_move_minutes", order_by= "week", offset= 7, name= "prev_week_move_minutes")
weekly_df_da.build_trans_df()

weekly_df_da.add_comparison_col_percent(curr_col="current_week_move_minutes", prev_col="prev_week_move_minutes", comp_col_name="weekly_move_minutes_change_percent")

In [0]:
weekly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "week"], agg_on_col="daily_calories_kcal", name= "current_week_calories_burned_kcal")
weekly_df_da.build_trans_df()

weekly_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_week_calories_burned_kcal", order_by= "week", offset= 7, name= "prev_week_calories_burned_kcal")
weekly_df_da.build_trans_df()

weekly_df_da.add_comparison_col_percent(curr_col="current_week_calories_burned_kcal", prev_col="prev_week_calories_burned_kcal", comp_col_name="weekly_calories_burned_change_percent")

In [0]:
weekly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "week"], agg_on_col="daily_distance_m", name= "current_week_distance_covered_meters")
weekly_df_da.build_trans_df()

weekly_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_week_distance_covered_meters", order_by= "week", offset= 7, name= "prev_week_distance_covered_meters")
weekly_df_da.build_trans_df()

weekly_df_da.add_comparison_col_percent(curr_col="current_week_distance_covered_meters", prev_col="prev_week_distance_covered_meters", comp_col_name="weekly_distance_covered_change_percent")

In [0]:
weekly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "week"], agg_on_col="daily_heart_points", name= "current_week_heart_points_earned")
weekly_df_da.build_trans_df()

weekly_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_week_heart_points_earned", order_by= "week", offset= 7, name= "prev_week_heart_points_earned")
weekly_df_da.build_trans_df()

weekly_df_da.add_comparison_col_percent(curr_col="current_week_heart_points_earned", prev_col="prev_week_heart_points_earned", comp_col_name="weekly_heart_points_change_percent")

In [0]:
weekly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "week"], agg_on_col="daily_step_count", name= "current_week_step_count")
weekly_df_da.build_trans_df()

weekly_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_week_step_count", order_by= "week", offset= 7, name= "prev_week_step_count")
weekly_df_da.build_trans_df()

weekly_df_da.add_comparison_col_percent(curr_col="current_week_step_count", prev_col="prev_week_step_count", comp_col_name="weekly_step_count_change_percent")

In [0]:
weekly_df_da.df_trans.display()

In [0]:
weekly_plots = []
for col in weekly_df_da.df_trans.columns:
        if re.search(r"percent|current", col):
            weekly_plots.append(LinePlot(weekly_df_da, "week", col))  


def save_weekly_plots(LinePlotObj):
    LinePlotObj.plot()
    plt.savefig(LinePlotObj.save_path)
    plt.close()

futures = []
with ProcessPoolExecutor(len(weekly_plots)) as pe:
    for plot in weekly_plots:
        futures.append(pe.submit(save_weekly_plots, plot))

for future in as_completed(futures):
    future.result()


In [0]:
# image_paths = [weekly_plot.save_path for weekly_plot in weekly_plots]

# for image_path in image_paths:
#     filename = image_path.split('/')[-1]
#     displayHTML(f'<a href="files/{image_path}" download>Download {filename}</a><br>')

In [0]:
from IPython.display import Image, display

for image_path in image_paths:
    # Read image bytes from the volume
    with open(image_path, "rb") as f:
        img_bytes = f.read()
    display(Image(data=img_bytes))

In [0]:
dbutils.fs.rm("/Volumes/google_fit/gold/weekly_plots_activity_metrics", recurse= True)

In [0]:
dbutils.notebook.exit("Success")

In [0]:
monthly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "month"], agg_on_col="daily_move_minutes_count", name= "current_month_move_minutes")
monthly_df_da.build_trans_df()



monthly_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_month_move_minutes", order_by= "month", offset= 31, default= 0, name= "prev_month_move_minutes")
monthly_df_da.build_trans_df()


monthly_df_da.add_comparison_col_percent(curr_col="current_month_move_minutes", prev_col="prev_month_move_minutes", comp_col_name="monthly_move_minutes_change_percent")

In [0]:
monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn('rnk', F.rank().over(W.partitionBy("entity").orderBy(F.col("month").asc())))

In [0]:

monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn("prev_month_move_minutes", F.first_value('current_month_move_minutes').over(W.partitionBy("entity").rowsBetween(-31, W.currentRow)))

In [0]:
monthly_df_da.df_trans.display()

In [0]:
LinePlot(monthly_df_da, x= "month", y= "monthly_move_minutes_change_percent").plot()

In [0]:
test_df = copy.deepcopy(monthly_df_da.df_trans.select(['entity', 'month', 'daily_move_minutes_count']))

In [0]:
display(
    test_df.groupBy(["entity", "month"]).agg(F.sum("daily_move_minutes_count").alias("monthly_move_minutes"))
            .withColumn('prev_month_move_minutes', F.lag(F.col("monthly_move_minutes"), offset=1).over(W.partitionBy("entity").orderBy("month")))
)