In [31]:
import pandas as pd
from datetime import datetime
import calendar
import os
import json

# Load age analysis
age_df = pd.read_json("json_outputs/age_analysis_clean.json", lines=True)

# Convert FIN_PERIOD from timestamp to datetime
age_df["FIN_PERIOD_DT"] = pd.to_datetime(age_df["FIN_PERIOD"], unit="ms")

# Get min and max dates
min_date = age_df["FIN_PERIOD_DT"].min()
max_date = age_df["FIN_PERIOD_DT"].max()

In [32]:
def generate_financial_months_from_range(start_date, end_date):
    financial_months = []
    current = start_date.replace(day=1)

    while current <= end_date:
        year = current.year
        month = current.month

        # Last Saturday of previous month
        prev_month = month - 1 if month > 1 else 12
        prev_year = year if month > 1 else year - 1
        last_day_prev = calendar.monthrange(prev_year, prev_month)[1]
        last_saturday = max(
            datetime(prev_year, prev_month, d)
            for d in range(last_day_prev - 6, last_day_prev + 1)
            if datetime(prev_year, prev_month, d).weekday() == 5
        )

        # Last Friday of current month
        last_day_curr = calendar.monthrange(year, month)[1]
        last_friday = max(
            datetime(year, month, d)
            for d in range(last_day_curr - 6, last_day_curr + 1)
            if datetime(year, month, d).weekday() == 4
        )

        # Determine quarter
        quarter = (month - 1) // 3 + 1
        quarter_label = f"Q{quarter} {year}"

        financial_months.append({
            "financial_month": f"{year}-{month:02}",
            "start_date": last_saturday.strftime("%Y-%m-%d"),
            "end_date": last_friday.strftime("%Y-%m-%d"),
            "year": year,
            "month": month,
            "quarter": quarter,
            "quarter_label": quarter_label
        })

        # Move to next month
        if month == 12:
            current = datetime(year + 1, 1, 1)
        else:
            current = datetime(year, month + 1, 1)

    return financial_months

# Generate dim_time
dim_time = generate_financial_months_from_range(min_date, max_date)

In [33]:
dim_time_df = pd.DataFrame(dim_time)
dim_time_df.columns.tolist()

['financial_month',
 'start_date',
 'end_date',
 'year',
 'month',
 'quarter',
 'quarter_label']

In [34]:
dim_time_df

Unnamed: 0,financial_month,start_date,end_date,year,month,quarter,quarter_label
0,2018-03,2018-02-24,2018-03-30,2018,3,1,Q1 2018
1,2018-04,2018-03-31,2018-04-27,2018,4,2,Q2 2018
2,2018-05,2018-04-28,2018-05-25,2018,5,2,Q2 2018
3,2018-06,2018-05-26,2018-06-29,2018,6,2,Q2 2018
4,2018-07,2018-06-30,2018-07-27,2018,7,3,Q3 2018
5,2018-08,2018-07-28,2018-08-31,2018,8,3,Q3 2018
6,2018-09,2018-08-25,2018-09-28,2018,9,3,Q3 2018
7,2018-10,2018-09-29,2018-10-26,2018,10,4,Q4 2018
8,2018-11,2018-10-27,2018-11-30,2018,11,4,Q4 2018
9,2018-12,2018-11-24,2018-12-28,2018,12,4,Q4 2018


In [35]:
csv_folder = os.path.join(os.getcwd(), "csv_outputs")
json_folder = os.path.join(os.getcwd(), "json_outputs")

In [36]:
dim_time_df.to_csv(os.path.join(csv_folder, "dim_time_collection.csv"), index=False)
dim_time_df.to_json(os.path.join(json_folder, "dim_time_collection.json"), orient="records", lines=True)