In [0]:
%run /Workspace/Users/rishak1997@gmail.com/google-fit-ETL-and-analytics/gold_layer_utils

In [0]:
from concurrent.futures import ProcessPoolExecutor, as_completed

In [0]:
%sql

create schema if not exists google_fit.gold

In [0]:
%sql

create volume if not exists google_fit.gold.activity_metrics_plots

In [0]:
daily_activity_metric_df_base = CreateDF.from_table(schema_name= "silver", table_name= "daily_activity_metrics")

In [0]:
prefix_daily = lambda col : f"daily_{col}" if col not in ["date", "week", "month", "entity"] else col

daily_activity_metric_df_base = (
    daily_activity_metric_df_base.select(
        *[  
            col
            for col in daily_activity_metric_df_base.columns
            if not re.search(r"max_|min_|average_|cycling|running", col)
        ]
    )
    .withColumn("month", F.date_trunc("month", F.col("date")).cast(DateType()))
)

In [0]:
daily_activity_metric_df_renamed = daily_activity_metric_df_base.toDF(*map(prefix_daily, daily_activity_metric_df_base.columns))

## **Monthly metrics**

In [0]:
monthly_df_da = DeclarativeAggregations(
    daily_activity_metric_df_renamed.select(["entity", "date", "month" , "daily_move_minutes_count", "daily_calories_kcal", "daily_distance_m", "daily_heart_points", "daily_heart_minutes", "daily_step_count", "daily_walking_duration_ms"])
)

monthly_df_da.define_entities()
monthly_df_da.clear_agg_config()
monthly_df_da.clear_df_trans()

monthly_df_da.df.display()

In [0]:
monthly_df_da.build_agg_config(agg_metric="rank", group_by_cols=["entity"], order_by= 'month',  name= "drank")
monthly_df_da.build_trans_df()

In [0]:
monthly_df_da.current_attributes()

In [0]:
monthly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "month"], agg_on_col="daily_move_minutes_count", name= "current_month_move_minutes")
monthly_df_da.build_trans_df()


monthly_df_da.build_agg_config(agg_metric="first_value", group_by_cols=["entity"], agg_on_col="current_month_move_minutes",  name= "prev_month_move_minutes", rows_between_args=(-31, 0))
monthly_df_da.build_trans_df()

monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn("prev_month_move_minutes", F.when(F.col('drank') == 1, None).otherwise(F.col('prev_month_move_minutes')))

monthly_df_da.add_comparison_col_percent(curr_col="current_month_move_minutes", prev_col="prev_month_move_minutes", comp_col_name="monthly_move_minutes_change_percent")

In [0]:
monthly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "month"], agg_on_col="daily_calories_kcal", name= "current_month_calories_burned_kcal")
monthly_df_da.build_trans_df()


monthly_df_da.build_agg_config(agg_metric="first_value", group_by_cols=["entity"], agg_on_col="current_month_calories_burned_kcal",  name= "prev_month_calories_burned_kcal", rows_between_args=(-31, 0))
monthly_df_da.build_trans_df()

monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn("prev_month_calories_burned_kcal", F.when(F.col('drank') == 1, None).otherwise(F.col('prev_month_calories_burned_kcal')))

monthly_df_da.add_comparison_col_percent(curr_col="current_month_calories_burned_kcal", prev_col="prev_month_calories_burned_kcal", comp_col_name="monthly_calories_burned_change_percent")

In [0]:
monthly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "month"], agg_on_col="daily_distance_m", name= "current_month_distance_covered_meters")
monthly_df_da.build_trans_df()


monthly_df_da.build_agg_config(agg_metric="first_value", group_by_cols=["entity"], agg_on_col="current_month_distance_covered_meters",  name= "prev_month_distance_covered_meters", rows_between_args=(-31, 0))
monthly_df_da.build_trans_df()

monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn("prev_month_distance_covered_meters", F.when(F.col('drank') == 1, None).otherwise(F.col('prev_month_distance_covered_meters')))

monthly_df_da.add_comparison_col_percent(curr_col="current_month_distance_covered_meters", prev_col="prev_month_distance_covered_meters", comp_col_name="monthly_distance_covered_change_percent")

In [0]:
monthly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "month"], agg_on_col="daily_heart_points", name= "current_month_heart_points_earned")
monthly_df_da.build_trans_df()


monthly_df_da.build_agg_config(agg_metric="first_value", group_by_cols=["entity"], agg_on_col="current_month_heart_points_earned",  name= "prev_month_heart_points_earned", rows_between_args=(-31, 0))
monthly_df_da.build_trans_df()

monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn("prev_month_heart_points_earned", F.when(F.col('drank') == 1, None).otherwise(F.col('prev_month_heart_points_earned')))

monthly_df_da.add_comparison_col_percent(curr_col="current_month_heart_points_earned", prev_col="prev_month_heart_points_earned", comp_col_name="monthly_heart_points_change_percent")

In [0]:
monthly_df_da.build_agg_config(agg_metric="sum", group_by_cols=["entity", "month"], agg_on_col="daily_step_count", name= "current_month_step_count")
monthly_df_da.build_trans_df()


monthly_df_da.build_agg_config(agg_metric="first_value", group_by_cols=["entity"], agg_on_col="current_month_step_count",  name= "prev_month_step_count", rows_between_args=(-31, 0))
monthly_df_da.build_trans_df()

monthly_df_da.df_trans = monthly_df_da.df_trans.withColumn("prev_month_step_count", F.when(F.col('drank') == 1, None).otherwise(F.col('prev_month_step_count')))

monthly_df_da.add_comparison_col_percent(curr_col="current_month_step_count", prev_col="prev_month_step_count", comp_col_name="monthly_step_count_change_percent")

In [0]:
monthly_df_da.df_trans.display()

In [0]:
monthly_plots = []
for col in monthly_df_da.df_trans.columns:
        if re.search(r"percent|current", col):
            monthly_plots.append(LinePlot(monthly_df_da, "month", col))  


def save_monthly_plots(LinePlotObj):
    LinePlotObj.plot()
    plt.savefig(LinePlotObj.save_path)
    plt.close()

futures = []
with ProcessPoolExecutor(len(monthly_plots)) as pe:
    for plot in monthly_plots:
        futures.append(pe.submit(save_monthly_plots, plot))

for future in as_completed(futures):
    future.result()


In [0]:
from IPython.display import Image, display

image_paths = [monthly_plot.save_path for monthly_plot in monthly_plots]

for image_path in image_paths:
    # Read image bytes from the volume
    with open(image_path, "rb") as f:
        img_bytes = f.read()
    display(Image(data=img_bytes))

In [0]:
dbutils.fs.rm("/Volumes/google_fit/gold/activity_metrics_plots/", recurse= True)

In [0]:
dbutils.notebook.exit("Success")