In [0]:
%run /Workspace/Users/rishak1997@gmail.com/google-fit-ETL-and-analytics/gold_layer_utils

In [0]:
from concurrent.futures import ProcessPoolExecutor, as_completed

In [0]:
%sql

create schema if not exists google_fit.gold

In [0]:
%sql

create volume if not exists google_fit.gold.activity_metrics_plots

In [0]:
daily_activity_metric_df_base = CreateDF.from_table(schema_name= "silver", table_name= "daily_activity_metrics")

In [0]:
prefix_daily = lambda col : f"daily_{col}" if col not in ["date", "week", "month", "entity"] else col

daily_activity_metric_df_base = (
    daily_activity_metric_df_base.select(
        *[  
            col
            for col in daily_activity_metric_df_base.columns
            if not re.search(r"max_|min_|average_|cycling|running", col)
        ]
    )
)

In [0]:
daily_activity_metric_df_renamed = daily_activity_metric_df_base.toDF(*map(prefix_daily, daily_activity_metric_df_base.columns))

## **Daily metrics**

In [0]:
daily_df_da = DeclarativeAggregations(
    daily_activity_metric_df_renamed.select(["entity", "date", "daily_move_minutes_count", "daily_calories_kcal", "daily_distance_m", "daily_heart_points", "daily_step_count"])
)

daily_df_da.define_entities()
daily_df_da.clear_agg_config()
daily_df_da.clear_df_trans()


daily_df_da.df.display()

In [0]:
daily_df_da.current_attributes()

In [0]:
daily_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "date"], agg_on_col="daily_move_minutes_count", name= "current_day_move_minutes")
daily_df_da.build_trans_df()

daily_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_day_move_minutes", order_by= "date", offset= 1, name= "prev_day_move_minutes")
daily_df_da.build_trans_df()

daily_df_da.add_comparison_col_percent(curr_col="current_day_move_minutes", prev_col="prev_day_move_minutes", comp_col_name="daily_move_minutes_change_percent")

In [0]:
daily_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "date"], agg_on_col="daily_calories_kcal", name= "current_day_calories_burned_kcal")
daily_df_da.build_trans_df()

daily_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_day_calories_burned_kcal", order_by= "date", offset= 1, name= "prev_day_calories_burned_kcal")
daily_df_da.build_trans_df()

daily_df_da.add_comparison_col_percent(curr_col="current_day_calories_burned_kcal", prev_col="prev_day_calories_burned_kcal", comp_col_name="daily_calories_burned_change_percent")

In [0]:
daily_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "date"], agg_on_col="daily_distance_m", name= "current_day_distance_covered_meters")
daily_df_da.build_trans_df()

daily_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_day_distance_covered_meters", order_by= "date", offset= 1, name= "prev_day_distance_covered_meters")
daily_df_da.build_trans_df()

daily_df_da.add_comparison_col_percent(curr_col="current_day_distance_covered_meters", prev_col="prev_day_distance_covered_meters", comp_col_name="daily_distance_covered_change_percent")

In [0]:
daily_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "date"], agg_on_col="daily_heart_points", name= "current_day_heart_points_earned")
daily_df_da.build_trans_df()

daily_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_day_heart_points_earned", order_by= "date", offset= 1, name= "prev_day_heart_points_earned")
daily_df_da.build_trans_df()

daily_df_da.add_comparison_col_percent(curr_col="current_day_heart_points_earned", prev_col="prev_day_heart_points_earned", comp_col_name="daily_heart_points_change_percent")

In [0]:
daily_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "date"], agg_on_col="daily_step_count", name= "current_day_step_count")
daily_df_da.build_trans_df()

daily_df_da.build_agg_config(agg_metric="lag", group_by_cols=["entity"], agg_on_col="current_day_step_count", order_by= "date", offset= 1, name= "prev_day_step_count")
daily_df_da.build_trans_df()

daily_df_da.add_comparison_col_percent(curr_col="current_day_step_count", prev_col="prev_day_step_count", comp_col_name="daily_step_count_change_percent")

In [0]:
daily_df_da.df_trans.display()

In [0]:
daily_plots = []
for col in daily_df_da.df_trans.columns:
        if re.search(r"percent|current", col):
            daily_plots.append(LinePlot(daily_df_da, "date", col))  


def save_daily_plots(LinePlotObj):
    LinePlotObj.plot()
    plt.savefig(LinePlotObj.save_path)
    plt.close()

futures = []
with ProcessPoolExecutor(len(daily_plots)) as pe:
    for plot in daily_plots:
        futures.append(pe.submit(save_daily_plots, plot))

for future in as_completed(futures):
    future.result()


In [0]:
from IPython.display import Image, display

image_paths = [daily_plot.save_path for daily_plot in daily_plots]

for image_path in image_paths:
    # Read image bytes from the volume
    with open(image_path, "rb") as f:
        img_bytes = f.read()
    display(Image(data=img_bytes))

In [0]:
dbutils.fs.rm("/Volumes/google_fit/gold/activity_metrics_plots/", recurse= True)

In [0]:
dbutils.notebook.exit("Success")