### Import relevant libraries

In [None]:
from semopy import Model, calc_stats, semplot
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from scipy.stats import f_oneway
import pandas as pd
import numpy as np
import seaborn as sns
from semopy.inspector import inspect


### Import the dataset

In [2]:
df = pd.read_csv("Welzijnsmonitor2025_scaled_normalised_UTF8.csv", sep=";")

### Creating the model

In [3]:
model_desc = """
engagement =~ Bevl_1 + Bevl_2 + Bevl_3

emotional_exhaustion =~ Burn1_1 + Burn1_2 + Burn1_3 + Burn1_4

study_motivation =~ Mot_Stress_1

study_stress =~ Mot_Stress_2

unnecess_stress =~ Onnodige_stress_1 + Onnodige_stress_2 + Onnodige_stress_3 + Onnodige_stress_4 + Onnodige_stress_5 + Onnodige_stress_6 + Onnodige_stress_7 + Onnodige_stress_8 + Onnodige_stress_9 + Onnodige_stress_10

life_style =~ Leefst

life_satisfaction =~ Cantrill_1

depression =~ Depr_1 + Depr_2 + Depr_3 + Depr_4 + Depr_5 + Depr_6 + Depr_7 + Depr_8

study_point =~ StPunt_beh

grade =~ Cijfer_huidig_1

study_delay =~ Vertr

stop_study =~ StopInt

support_study =~ Hulp_1

support_practical =~ Hulp_2

support_socio =~ Hulp_3

curriculum_engagement =~ Cogn_Eng1_1 + Cogn_Eng1_2 + Cogn_Eng1_3 + Cogn_Eng1_4 

belonging =~ Cogn_Eng1_5 + Cogn_Eng1_6

social_integration =~ Cogn_Eng2_1 + Cogn_Eng2_2 + Cogn_Eng2_3 + Cogn_Eng2_4

academic_integration =~ Cogn_Eng2_5 + Cogn_Eng2_6 + Cogn_Eng2_7 + Cogn_Eng2_8

participation =~ Partici1_1 + Partici1_2 + Partici1_3 + Partici1_4

continue_study =~ Stopint2

parents_engagement =~ Betrok_Ouders

work_hour =~ Werk_1
"""

In [4]:
model = Model(model_desc)
model.fit(df)
latent_scores = model.predict_factors(df)

# Export the model with the new latent variables

In [5]:
latent_scores = model.predict_factors(df)

latent_scores.to_csv("latent_variable_scores.csv", index=False)

print("Latent variable scores extracted:")
print(latent_scores.head())

Latent variable scores extracted:
   academic_integration  belonging  continue_study  curriculum_engagement  \
0              0.176152  -0.219703       -0.005281               0.365489   
1             -0.595180  -0.332382       -0.004207              -0.855062   
2             -0.430053   0.332363        0.001761               0.144472   
3             -0.875461  -0.593546       -0.018376              -0.623588   
4             -0.063874  -0.536120       -0.009329              -0.027499   

   depression  emotional_exhaustion  engagement     grade  life_satisfaction  \
0   -0.163640             -0.019101    0.297295  0.067630           0.086318   
1   -0.907074             -0.821486    0.014832 -0.105362           0.297334   
2   -0.243419             -0.469400    0.377442  0.039994           0.138626   
3   -0.672496             -0.109148   -0.305431 -0.089103           0.172383   
4    1.481215              0.607028   -0.102114  0.015819          -0.432057   

   life_style  ...  st

### Create a minmax normaliztion for latent varibles (Temporary solution)

In [6]:
scaler = MinMaxScaler()

latent_scores_normalized = pd.DataFrame(
    scaler.fit_transform(latent_scores),
    columns=latent_scores.columns,
    index=latent_scores.index
)

In [7]:
df_with_latent_norm = df.copy()
for col in latent_scores_normalized.columns:
    df_with_latent_norm[col + "_norm"] = latent_scores_normalized[col]

# Save if needed
df_with_latent_norm.to_csv("data_with_normalized_latent_variables.csv", index=False)

### Calculate fit indices

In [8]:
stats = calc_stats(model)

print("=== Model Fit Statistics ===")
print(stats.T)

=== Model Fit Statistics ===
                      Value
DoF             1286.000000
DoF Baseline    1596.000000
chi2            5739.968264
chi2 p-value       0.000000
chi2 Baseline  31735.985357
CFI                0.852224
GFI                0.819134
AGFI               0.775535
NFI                0.819134
TLI                0.816601
RMSEA              0.047860
AIC              726.412468
BIC             2679.531313
LogLik             3.793766


### Create and save the path diagram

In [12]:
g = semplot(model, "pd.png")



In [11]:
print(g)

digraph G {
	overlap=scale splines=true
	edge [fontsize=12]
	node [fillcolor="#cae6df" shape=circle style=filled]
	emotional_exhaustion [label=emotional_exhaustion]
	life_style [label=life_style]
	engagement [label=engagement]
	study_point [label=study_point]
	life_satisfaction [label=life_satisfaction]
	parents_engagement [label=parents_engagement]
	social_integration [label=social_integration]
	grade [label=grade]
	belonging [label=belonging]
	unnecess_stress [label=unnecess_stress]
	participation [label=participation]
	study_motivation [label=study_motivation]
	stop_study [label=stop_study]
	work_hour [label=work_hour]
	support_study [label=support_study]
	support_practical [label=support_practical]
	study_stress [label=study_stress]
	study_delay [label=study_delay]
	depression [label=depression]
	support_socio [label=support_socio]
	academic_integration [label=academic_integration]
	curriculum_engagement [label=curriculum_engagement]
	continue_study [label=continue_study]
	node [sh

In [24]:
# regressions
# Stressoren ~ Energiebronnen
# Energiebronnen ~ Stressoren

# Stressreacties ~ Stressoren + Energiebronnen + Welbevinden
# Welbevinden ~ Stressoren + Energiebronnen + Stressreacties

# Positieve_Uitkomsten ~ Welbevinden + Stressreacties
# Negatieve_Uitkomsten ~ Welbevinden + Stressreacties