### Goal

We would like to model

$x_{i,t}=\Lambda f_t + \epsilon_{i,t}$

* $i$ = patient index
* $t$ = time (irregular visits per patient)
* $x_{i,t} \in R^p$ Patients embedding information (realization from the latent factor)
* $f_t \in R^r$ Shared latent temporatl factors (underlying population health states)
* $\Lambda \in R^{p\times r}$ Factor loadings (relationship between embeddings and latent factors)

### Mapping

| Model Element | Interpretation |
|---------------|----------------|
| Observed variables | Daily metrics (Counts by department, age group, diagnosis group) |
| Latent factors | Underlying patient-type intensities that jointly influence those metrics |
| Factor loadings | How strongly each variable with each latent patient type |
| Factor dynamics | How those patient types evolve over time (trends, cycles, shocks) |

In [45]:
import os
import sys
root_dir = os.path.abspath('../')
sys.path.append(root_dir)

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pipeline.dynamic_factor.hana_ent.run import DFMConfig, RegressionConfig, RollingConfig
from pipeline.dynamic_factor.hana_ent.run import rolling_method_dfm
from pipeline.dynamic_factor.hana_ent.run import compare_pred_with_actual

# --- Stunning palette: Viridis or Plasma ---
palette = sns.color_palette("viridis", 8)

sns.set_theme(
    style="whitegrid",
    font="sans-serif",
    rc={
        "font.size": 13,
        "axes.titlesize": 16,
        "axes.titleweight": "bold",
        "axes.labelsize": 13,
        "axes.labelweight": "semibold",
        "axes.edgecolor": "#2F2F2F",
        "axes.linewidth": 0.8,
        "grid.color": "#CCCCCC",
        "grid.linewidth": 0.6,
        "grid.alpha": 0.35,
        "figure.facecolor": "#FAFAFA",
        "axes.facecolor": "#FFFFFF",
        "axes.prop_cycle": plt.cycler("color", palette),
    }
)

In [2]:
# Dynamic Factor Model - engineered data
dfm_data = pd.read_parquet(
    os.path.join(root_dir, "data/processed/hana_ent/dfm_daily.parquet")
)
full_range = pd.date_range(dfm_data.index.min(), dfm_data.index.max(), freq='D')
dfm_data = dfm_data.reindex(full_range)
dfm_data = dfm_data.fillna(0)

# Resample to weekly frequency
dfm_data = dfm_data.resample('W').sum()

# Supply data
supply = pd.read_parquet(
    os.path.join(root_dir, "./data/processed/hana_ent/supply.parquet")
)
full_range = pd.date_range(supply.index.min(), supply.index.max(), freq='D')
supply = supply.reindex(full_range)
supply = supply.fillna(0)

# Resample to weekly frequency
supply = supply.resample('W').sum()

# Major supply: dexamethasone, tramadol, netilmicin, electrolytes with carbohydrates, diclofenac
supply_major = supply[['dexamethasone', 'tramadol', 'netilmicin', 'electrolytes with carbohydrates', 'diclofenac']]

In [3]:
dfm_data.head(2)

Unnamed: 0,male,female,age_0_10,age_10_20,age_20_30,age_30_40,age_40_50,age_50_60,age_60_70,age_70_80,...,M01,M03,N01,N02,N05,N07,R03,R05,R06,V03
2018-01-07,1703.0,1512.0,105.0,46.0,656.0,899.0,707.0,406.0,208.0,86.0,...,78.0,0.0,46.0,1047.0,0.0,0.0,0.0,6.0,25.0,0.0
2018-01-14,1660.0,1498.0,95.0,54.0,687.0,1030.0,651.0,349.0,188.0,88.0,...,121.0,0.0,44.0,882.0,0.0,0.0,0.0,2.0,29.0,0.0


In [4]:
supply_major.head(2)

prescription,dexamethasone,tramadol,netilmicin,electrolytes with carbohydrates,diclofenac
2018-01-07,1414.0,1047.0,336.0,107.0,78.0
2018-01-14,1348.0,882.0,501.0,85.0,121.0


##### Number of factors : 3

In [None]:
# Number of factors: 3
dfm_cfg = DFMConfig(
    n_factors=3,      # start small, you can tune this
    factor_order=1
)

reg_cfg = RegressionConfig()

roll_cfg = RollingConfig(
    window_type="rolling",
    window_size=104,      # for example: 2 years of weekly data
    forecast_horizon=4,   # 4 weeks ahead
    min_train_size=80     # first forecast after ~1.5 years
)

pred, factor = rolling_method_dfm(
    dfm_data=dfm_data,
    supply_data=supply,
    dfm_cfg=dfm_cfg,
    reg_cfg=reg_cfg,
    roll_cfg=roll_cfg
)

comparison = compare_pred_with_actual(
    pred=pred,
    supply_true=supply,
    target_cols=reg_cfg.target_columns
)

In [63]:
comparison.to_excel("dfm3_comparison.xlsx")

In [None]:
def plot_separate_graphs_better(
    comparison_df: pd.DataFrame,
    target_cols: list
):
    """
    Draws 5 separate, easy-to-read graphs.
    X-axis = integer index
    Y-axis = predicted vs actual
    """

    # Distinct, highly contrasting colors
    pred_color = "#1f77b4"   # blue
    true_color = "#d62728"   # red
    x = np.arange(len(comparison_df))
    for col in target_cols:

        # pred_col = f"{col}_pred"
        # true_col = f"{col}_true"

        plt.figure(figsize=(14, 4))

        # Plot actual values
        plt.plot(
            x,
            comparison_df[col]['true'],
            label="Actual",
            linewidth=2.6,
            color=true_color,
            marker="o",
            markersize=6
        )

        # Plot predicted values
        plt.plot(
            x,
            comparison_df[col]['pred'],
            label="Predicted",
            linewidth=2.6,
            color=pred_color,
            marker="s",
            markersize=6
        )

        plt.xticks(x, comparison_df.index.strftime("%Y-%m-%d"), rotation=90)
        plt.title(f"Predicted vs Actual – {col}", weight="bold", fontsize=16)
        plt.xlabel("Integer Index")
        plt.ylabel("Weekly Usage")
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.tight_layout()
        plt.savefig(os.path.join(root_dir, f"notebook/static/dfm3/dfm_{col}.png"))
        plt.show()

In [None]:
plot_separate_graphs_better(
    comparison_df=comparison,
    target_cols=reg_cfg.target_columns
)

##### Number of factors : 5

In [None]:
# Number of factors: 5
dfm_cfg = DFMConfig(
    n_factors=5,      # start small, you can tune this
    factor_order=1
)

reg_cfg = RegressionConfig()

roll_cfg = RollingConfig(
    window_type="rolling",
    window_size=104,      # for example: 2 years of weekly data
    forecast_horizon=4,   # 4 weeks ahead
    min_train_size=80     # first forecast after ~1.5 years
)

pred, factor = rolling_method_dfm(
    dfm_data=dfm_data,
    supply_data=supply,
    dfm_cfg=dfm_cfg,
    reg_cfg=reg_cfg,
    roll_cfg=roll_cfg
)

comparison = compare_pred_with_actual(
    pred=pred,
    supply_true=supply,
    target_cols=reg_cfg.target_columns
)

In [56]:
def plot_separate_graphs_better(
    comparison_df: pd.DataFrame,
    target_cols: list
):
    """
    Draws 5 separate, easy-to-read graphs.
    X-axis = integer index
    Y-axis = predicted vs actual
    """

    # Distinct, highly contrasting colors
    pred_color = "#1f77b4"   # blue
    true_color = "#d62728"   # red
    x = np.arange(len(comparison_df))
    for col in target_cols:

        # pred_col = f"{col}_pred"
        # true_col = f"{col}_true"

        plt.figure(figsize=(14, 4))

        # Plot actual values
        plt.plot(
            x,
            comparison_df[col]['true'],
            label="Actual",
            linewidth=2.6,
            color=true_color,
            marker="o",
            markersize=6
        )

        # Plot predicted values
        plt.plot(
            x,
            comparison_df[col]['pred'],
            label="Predicted",
            linewidth=2.6,
            color=pred_color,
            marker="s",
            markersize=6
        )

        plt.xticks(x, comparison_df.index.strftime("%Y-%m-%d"), rotation=90)
        plt.title(f"Predicted vs Actual – {col}", weight="bold", fontsize=16)
        plt.xlabel("Integer Index")
        plt.ylabel("Weekly Usage")
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.tight_layout()
        plt.savefig(os.path.join(root_dir, f"notebook/static/dfm5/dfm_{col}.png"))
        plt.show()

In [None]:
plot_separate_graphs_better(
    comparison_df=comparison,
    target_cols=reg_cfg.target_columns
)

In [61]:
comparison.to_excel("dfm5_comparison.xlsx")

##### Number of factors : 7

In [64]:
# Number of factors: 3
dfm_cfg = DFMConfig(
    n_factors=7,      # start small, you can tune this
    factor_order=1
)

reg_cfg = RegressionConfig()

roll_cfg = RollingConfig(
    window_type="rolling",
    window_size=104,      # for example: 2 years of weekly data
    forecast_horizon=4,   # 4 weeks ahead
    min_train_size=80     # first forecast after ~1.5 years
)

pred, factor = rolling_method_dfm(
    dfm_data=dfm_data,
    supply_data=supply,
    dfm_cfg=dfm_cfg,
    reg_cfg=reg_cfg,
    roll_cfg=roll_cfg
)

comparison = compare_pred_with_actual(
    pred=pred,
    supply_true=supply,
    target_cols=reg_cfg.target_columns
)

[DEBUG] Fitting window 80
[Debug] Xw shape: (80, 26) | Sw shape: (80, 48)




[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG



[DEBUG] Fitting model for Vitamin B-complex, plain
[DEBUG] Fitting model for alprostadil
[DEBUG] Fitting model for ambroxol
[DEBUG] Fitting model for amoxicillin and beta-lactamase inhibitor
[DEBUG] Fitting model for carbohydrates
[DEBUG] Fitting model for cefmetazole
[DEBUG] Fitting model for cefotetan
[DEBUG] Fitting model for cefotiam
[DEBUG] Fitting model for ceftriaxone
[DEBUG] Fitting model for cefuroxime
[DEBUG] Fitting model for chlorphenamine
[DEBUG] Fitting model for cimetidine
[DEBUG] Fitting model for ciprofloxacin
[DEBUG] Fitting model for dexamethasone
[DEBUG] Fitting model for diazepam
[DEBUG] Fitting model for diclofenac
[DEBUG] Fitting model for dupilumab
[DEBUG] Fitting model for electrolytes
[DEBUG] Fitting model for electrolytes with carbohydrates
[DEBUG] Fitting model for ephedrine
[DEBUG] Fitting model for etomidate
[DEBUG] Fitting model for fentanyl
[DEBUG] Fitting model for flomoxef
[DEBUG] Fitting model for flumazenil
[DEBUG] Fitting model for gentamicin
[DEBUG

In [65]:
def plot_separate_graphs_better(
    comparison_df: pd.DataFrame,
    target_cols: list
):
    """
    Draws 5 separate, easy-to-read graphs.
    X-axis = integer index
    Y-axis = predicted vs actual
    """

    # Distinct, highly contrasting colors
    pred_color = "#1f77b4"   # blue
    true_color = "#d62728"   # red
    x = np.arange(len(comparison_df))
    for col in target_cols:

        # pred_col = f"{col}_pred"
        # true_col = f"{col}_true"

        plt.figure(figsize=(14, 4))

        # Plot actual values
        plt.plot(
            x,
            comparison_df[col]['true'],
            label="Actual",
            linewidth=2.6,
            color=true_color,
            marker="o",
            markersize=6
        )

        # Plot predicted values
        plt.plot(
            x,
            comparison_df[col]['pred'],
            label="Predicted",
            linewidth=2.6,
            color=pred_color,
            marker="s",
            markersize=6
        )

        plt.xticks(x, comparison_df.index.strftime("%Y-%m-%d"), rotation=90)
        plt.title(f"Predicted vs Actual – {col}", weight="bold", fontsize=16)
        plt.xlabel("Integer Index")
        plt.ylabel("Weekly Usage")
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.tight_layout()
        plt.savefig(os.path.join(root_dir, f"notebook/static/dfm7/dfm_{col}.png"))
        plt.show()

In [None]:
plot_separate_graphs_better(
    comparison_df=comparison,
    target_cols=reg_cfg.target_columns
)

In [67]:
comparison.to_excel("dfm7_comparison.xlsx")