### Import relevant libraries

In [42]:
from semopy import Model, calc_stats
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from scipy.stats import f_oneway
import pandas as pd
import numpy as np
import seaborn as sns
from semopy.inspector import inspect
# from semopy.diagram import write_png
# from semopy import build_model_diagram


### Import the dataset

In [29]:
df = pd.read_csv("Welzijnsmonitor2025_scaled_normalised_UTF8.csv", sep=";")

### Creating the model

In [None]:
model_desc = """
# Measurement model

engagement =~ Bevl_1 + Bevl_2 + Bevl_3

emotional_exhaustion =~ Burn1_1 + Burn1_2 + Burn1_3 + Burn1_4

study_motivation =~ Mot_Stress_1

study_stress =~ Mot_Stress_2

unnecess_stress =~ Onnodige_stress_1 + Onnodige_stress_2 + Onnodige_stress_3 + Onnodige_stress_4 + Onnodige_stress_5 + Onnodige_stress_6 + Onnodige_stress_7 + Onnodige_stress_8 + Onnodige_stress_9 + Onnodige_stress_10

life_style =~ Leefst

life_satisfaction =~ Cantrill_1

depression =~ Depr_1 + Depr_2 + Depr_3 + Depr_4 + Depr_5 + Depr_6 + Depr_7 + Depr_8

study_point =~ StPunt_beh

grade =~ Cijfer_huidig_1

study_delay =~ Vertr

stop_study =~ StopInt

support_study =~ Hulp_1

support_practical =~ Hulp_2

support_socio =~ Hulp_3

curriculum_engagement =~ Cogn_Eng1_1 + Cogn_Eng1_2 + Cogn_Eng1_3 + Cogn_Eng1_4 

belonging =~ Cogn_Eng1_5 + Cogn_Eng1_6

social_integration =~ Cogn_Eng2_1 + Cogn_Eng2_2 + Cogn_Eng2_3 + Cogn_Eng2_4

academic_integration =~ Cogn_Eng2_5 + Cogn_Eng2_6 + Cogn_Eng2_7 + Cogn_Eng2_8

participation =~ Partici1_1 + Partici1_2 + Partici1_3 + Partici1_4

continue_study =~ Stopint2

parents_engagement =~ Betrok_Ouders



Veerkracht_1 =~ Veer_1 + Veer_5
Welbevinden =~ Bevlogenheid_en_Burnout_1 + Gedrag_1 + Motivatie + Veerkracht_1

Bekendheid_Interventies =~ Bekendgebruik_1 + Bekendgebruik_2 + Bekendgebruik_3 + Bekendgebruik_4 + Bekendgebruik_5 + Bekendgebruik_6 + Bekendgebruik_7

work_hour =~ Werk_1


Extra_Engagement =~ Cogregiedocent_1 + Cogregiedocent_2 + Cogregiedocent_3 +  Cogregiedocent_4 + Cogregiedocent_5 + Cogregiedocent_6 + Extr_eng2_1 + Extr_eng2_2 + Extr_eng2_3
"""

In [21]:
model = Model(model_desc)
model.fit(df)
latent_scores = model.predict_factors(df)

# Export the model with the new latent variables

In [22]:
latent_scores = model.predict_factors(df)

latent_scores.to_csv("latent_variable_scores.csv", index=False)

print("Latent variable scores extracted:")
print(latent_scores.head())

Latent variable scores extracted:
   Achtergrondkenmerken_1  Achtergrondkenmerken_2  Achtergrondkenmerken_3  \
0                0.516109                0.000925               -0.129402   
1                0.369231                0.015490               -0.320264   
2                0.506303                0.023396               -0.156456   
3               -0.161854                0.004191               -0.315727   
4                0.381887                0.008432               -0.302764   

   Bekendheid_Interventies  Bevlogenheid_en_Burnout_1  \
0                -0.253010                   0.257458   
1                -0.258810                   0.054494   
2                -0.253398                   0.433510   
3                -0.259518                  -0.130457   
4                -0.232772                  -0.118808   

   Bevlogenheid_en_Burnout_2  Depressie  Energiebronnen  Extra_Engagement  \
0                  -0.131377  -0.239755        0.011190          0.003618   
1     

### Create a minmax normaliztion for latent varibles (Temporary solution)

In [23]:
scaler = MinMaxScaler()

latent_scores_normalized = pd.DataFrame(
    scaler.fit_transform(latent_scores),
    columns=latent_scores.columns,
    index=latent_scores.index
)

In [25]:
df_with_latent_norm = df.copy()
for col in latent_scores_normalized.columns:
    df_with_latent_norm[col + "_norm"] = latent_scores_normalized[col]

# Save if needed
df_with_latent_norm.to_csv("data_with_normalized_latent_variables.csv", index=False)

### Calculate fit indices

In [30]:
stats = calc_stats(model)

print("=== Model Fit Statistics ===")
print(stats.T)

=== Model Fit Statistics ===
                      Value
DoF             3879.000000
DoF Baseline    4005.000000
chi2           21676.237683
chi2 p-value       0.000000
chi2 Baseline  56457.934823
CFI                0.660701
GFI                0.616064
AGFI               0.603593
NFI                0.616064
TLI                0.649680
RMSEA              0.055086
AIC              403.346679
BIC             1552.866217
LogLik            14.326661


### Create and save the path diagram
Doesn't work yet

In [None]:
# Produce a Graphviz object
g = build_model_diagram(model)

# Export diagram
g.render("sem_path_diagram", format="png", cleanup=True)

print("Diagram saved.")

In [24]:
# regressions
# Stressoren ~ Energiebronnen
# Energiebronnen ~ Stressoren

# Stressreacties ~ Stressoren + Energiebronnen + Welbevinden
# Welbevinden ~ Stressoren + Energiebronnen + Stressreacties

# Positieve_Uitkomsten ~ Welbevinden + Stressreacties
# Negatieve_Uitkomsten ~ Welbevinden + Stressreacties