In [0]:
dbutils.widgets.text(name="env",defaultValue="",label="Enter the environment in lower case")
env = dbutils.widgets.get("env")

from pyspark.sql.functions import lit, avg, pandas_udf, col, row_number
from datetime import datetime, timedelta
from pyspark.sql.types import StructType, StructField, StringType, DateType, DoubleType
import pandas as pd
from pyspark.sql.window import Window

input_table = f"{env}_silver.daily_price_aggregates"
windows = [100,60,30,10]

def read_prices():

    # Calculate start date 90 days ago (Python side)
    start_date = (datetime.today() - timedelta(days=150)).date()

    # Extract year and month from start_date
    start_year = start_date.year
    start_month = start_date.month

    # Also get current year and month
    current_year = datetime.today().year
    current_month = datetime.today().month

    df_recent = spark.table(input_table) \
        .filter(
            ( (col("year") == start_year) & (col("month") >= start_month) ) |
            ( (col("year") > start_year) & (col("year") < current_year) ) |
            ( (col("year") == current_year) & (col("month") <= current_month) )
        )

    df_filtered = df_recent.filter(col("date") >= lit(start_date.strftime("%Y-%m-%d")))
    window_spec = Window.partitionBy("symbol").orderBy(col("date").desc())
    df_latest = df_filtered.withColumn("rn", row_number().over(window_spec))

    df_latest.show(10)

    return df_latest

#NULL WHEN NO LAST 60 day present!


def moving_average(df_input, df_output, window_sizes):

    exprs = []
    for ws in window_sizes:
        exprs.append(
            avg(when(col("rn") <= ws, col("avg_price"))).alias(f"ma_{ws}")
        )
    ma_df = df_input.groupBy("symbol").agg(*exprs)
    df_output = df_output.join(ma_df, on="symbol", how="inner")

    return df_output




df_filtered = read_prices()
df_result = df_filtered.filter(col("rn") == 1).drop("rn")

df_result = moving_average(df_filtered, df_result, windows)


