In [0]:
%sql
SELECT 
    as_of_date
    ,account_name
    ,account_number 
    ,SUM(total_credits)
    ,SUM(total_debits)
    ,SUM(net_balance)
    ,SUM(debit_balance)
    ,SUM(credit_balance)
FROM fin_demo.acct.fact_gl_trial_balances 
WHERE account_number = 2000
GROUP BY all
ORDER BY 1

In [0]:
# COMMAND ----------
# ============================================================================
# MAIN EXECUTION - Generate Trial Balance for Oct 2023 to Sept 2025
# ============================================================================

# Generate trial balance for all months from Oct 2023 to Sept 2025
import pyspark.sql.functions as F
df_trial_balance = spark.table("fin_demo.acct.fact_gl_trial_balances")


# Show summary by month
print("\nMonthly Summary:")
df_monthly_summary = df_trial_balance.groupBy("as_of_date").agg(
    F.sum("debit_balance").alias("total_debit_balance"),
    F.sum("credit_balance").alias("total_credit_balance"),
    F.countDistinct("account_number").alias("account_count")
).orderBy("as_of_date")
display(df_monthly_summary)

# COMMAND ----------
# ============================================================================
# ANALYTICAL QUERIES - TRIAL BALANCE ANALYSIS
# ============================================================================

# Query 1: Latest Trial Balance Summary by Account Type
print("\n1. Trial Balance by Account Type (Latest Snapshot)")
df_by_type = spark.sql("""
    SELECT 
        account_type,
        COUNT(DISTINCT account_number) AS account_count,
        SUM(debit_balance) AS total_debit_balance,
        SUM(credit_balance) AS total_credit_balance,
        SUM(entry_count) AS total_entries
    FROM fin_demo.acct.fact_gl_trial_balances
    WHERE as_of_date = (SELECT MAX(as_of_date) FROM fin_demo.acct.fact_gl_trial_balances)
    GROUP BY account_type
    ORDER BY account_type
""")
display(df_by_type)

# Query 2: Top Accounts by Balance
print("\n2. Top 10 Accounts by Balance (Latest Snapshot)")
df_top_accounts = spark.sql("""
    SELECT 
        account_number,
        account_name,
        account_type,
        debit_balance + credit_balance AS total_balance,
        legal_entity_name,
        cost_center_name
    FROM fin_demo.acct.fact_gl_trial_balances
    WHERE as_of_date = (SELECT MAX(as_of_date) FROM fin_demo.acct.fact_gl_trial_balances)
    ORDER BY total_balance DESC
    LIMIT 10
""")
display(df_top_accounts)

# Query 3: Trial Balance by Legal Entity
print("\n3. Trial Balance by Legal Entity (Latest Snapshot)")
df_by_entity = spark.sql("""
    SELECT 
        legal_entity_name,
        account_type,
        SUM(debit_balance) AS total_debit_balance,
        SUM(credit_balance) AS total_credit_balance
    FROM fin_demo.acct.fact_gl_trial_balances
    WHERE as_of_date = (SELECT MAX(as_of_date) FROM fin_demo.acct.fact_gl_trial_balances)
    GROUP BY legal_entity_name, account_type
    ORDER BY legal_entity_name, account_type
""")
display(df_by_entity)

# Query 4: Trial Balance Trend (if multiple snapshots exist)
print("\n4. Trial Balance Trend Over Time")
df_trend = spark.sql("""
    SELECT 
        as_of_date,
        account_type,
        SUM(debit_balance) AS total_debit_balance,
        SUM(credit_balance) AS total_credit_balance
    FROM fin_demo.acct.fact_gl_trial_balances
    GROUP BY as_of_date, account_type
    ORDER BY as_of_date, account_type
""")
display(df_trend)

# Query 5: Trial Balance by Cost Center
print("\n5. Trial Balance by Cost Center (Latest Snapshot)")
df_by_cost_center = spark.sql("""
    SELECT 
        cost_center_name,
        account_type,
        SUM(debit_balance) AS total_debit_balance,
        SUM(credit_balance) AS total_credit_balance
    FROM fin_demo.acct.fact_gl_trial_balances
    WHERE as_of_date = (SELECT MAX(as_of_date) FROM fin_demo.acct.fact_gl_trial_balances)
        AND cost_center_name IS NOT NULL
    GROUP BY cost_center_name, account_type
    ORDER BY cost_center_name, account_type
""")
display(df_by_cost_center)

# COMMAND ----------
# ============================================================================
# FINANCIAL STATEMENTS BUILDER (Using Trial Balance)
# ============================================================================

def generate_balance_sheet(as_of_date):
    """Generate Balance Sheet from trial balance snapshot"""
    
    df_bs = spark.sql(f"""
        SELECT 
            CASE 
                WHEN account_type IN ('Asset', 'Liability', 'Equity') THEN account_type
                ELSE 'Other'
            END AS statement_section,
            account_number,
            account_name,
            debit_balance,
            credit_balance,
            CASE 
                WHEN account_type = 'Asset' THEN debit_balance
                WHEN account_type IN ('Liability', 'Equity') THEN credit_balance
                ELSE 0
            END AS balance
        FROM fin_demo.acct.fact_gl_trial_balances
        WHERE as_of_date = '{as_of_date}'
            AND account_type IN ('Asset', 'Liability', 'Equity')
        ORDER BY account_number
    """)
    
    return df_bs

def generate_income_statement(as_of_date):
    """Generate Income Statement from trial balance snapshot"""
    
    df_is = spark.sql(f"""
        SELECT 
            account_type AS statement_section,
            account_number,
            account_name,
            debit_balance,
            credit_balance,
            CASE 
                WHEN account_type = 'Revenue' THEN credit_balance
                WHEN account_type = 'Expense' THEN debit_balance
                ELSE 0
            END AS balance
        FROM fin_demo.acct.fact_gl_trial_balances
        WHERE as_of_date = '{as_of_date}'
            AND account_type IN ('Revenue', 'Expense')
        ORDER BY account_type DESC, account_number
    """)
    
    return df_is

# Example usage:
# df_balance_sheet = generate_balance_sheet('2024-12-31')
# display(df_balance_sheet)

# df_income_statement = generate_income_statement('2024-12-31')
# display(df_income_statement)

print("\nâœ“ Trial Balance generation framework complete!")