In [31]:
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
import statsmodels.api as sm
from scipy import stats
from tqdm import trange
import warnings
warnings.filterwarnings("ignore")

In [27]:
%pip install pandas numpy statsmodels tqdm setuptools scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [28]:
DATA_PATH = "/workspaces/Impact-of-Employee-Emotions-on-Productivity-with-the-Role-of-AI-Policy-Research/data/employee_emotion_productivity.csv"   # change to your CSV path
# Step 1: Define Likert mapping (all lowercase)
likert_map = {
    "strongly disagree": 1,
    "disagree": 2,
    "neutral": 3,
    "agree": 4,
    "strongly agree": 5
}


In [29]:
# Step 2: Load dataset
df = pd.read_csv(DATA_PATH)
print(f"Loaded dataset: {df.shape[0]} rows × {df.shape[1]} columns")

# Clean column names (remove line breaks and spaces)
df.columns = df.columns.str.replace(r"[\r\n]+", " ", regex=True).str.strip()

Loaded dataset: 305 rows × 26 columns


In [32]:
# Step 3: Convert all string entries to lowercase for mapping
for col in df.columns:
    if df[col].dtype == object:
        df[col] = df[col].astype(str).str.lower().str.strip()
        # Apply mapping where possible
        df[col] = df[col].replace(likert_map)

# Step 4: Define construct item groups (based on 26 columns structure)
positive_emotion_cols = df.columns[6:11]   # Q1–Q5
work_engagement_cols = df.columns[11:16]   # Q6–Q10
productivity_cols = df.columns[16:21]      # Q11–Q15
ban_ai_policy_cols = df.columns[21:26]     # Q16–Q20

In [33]:
# Compute indirect effect at a given level of W (w_value), accounting for moderation on a and b:
def conditional_indirect_effects(results, w_value, moderate_a=True, moderate_b=True):
    # Extract coefficients
    m_params = results['M_reg'].params
    y_params = results['Y_reg'].params

    # a path: M ~ a1*X + a2*W + a3*X*W
    a1 = m_params.get('Xc', 0.0)
    a3 = m_params.get('Xc_Wc', 0.0) if moderate_a else 0.0
    a_at_w = a1 + a3 * w_value

    # b path: Y ~ b1*M + b4*M*W + ...
    b1 = y_params.get('Mc', 0.0)
    b4 = y_params.get('Mc_Wc', 0.0) if moderate_b else 0.0
    b_at_w = b1 + b4 * w_value

    indirect = a_at_w * b_at_w
    direct = y_params.get('Xc', 0.0)
    total = direct + indirect
    return {
        'a_at_w': a_at_w,
        'b_at_w': b_at_w,
        'indirect': indirect,
        'direct': direct,
        'total': total
    }

In [34]:
# Step 5: Compute mean composite scores for each construct
df['PositiveEmotion'] = df[positive_emotion_cols].mean(axis=1)
df['WorkEngagement'] = df[work_engagement_cols].mean(axis=1)
df['Productivity'] = df[productivity_cols].mean(axis=1)
df['BAN_AI_Policy'] = df[ban_ai_policy_cols].mean(axis=1)

# Step 6: Center variables (mean centering for moderation)
df['PositiveEmotion_c'] = df['PositiveEmotion'] - df['PositiveEmotion'].mean()
df['WorkEngagement_c'] = df['WorkEngagement'] - df['WorkEngagement'].mean()
df['BAN_AI_Policy_c'] = df['BAN_AI_Policy'] - df['BAN_AI_Policy'].mean()

# Step 7: Interaction terms
df['XW'] = df['PositiveEmotion_c'] * df['BAN_AI_Policy_c']
df['MW'] = df['WorkEngagement_c'] * df['BAN_AI_Policy_c']

In [35]:
# ---------- PLS helper (optional) ----------
def compute_pls_scores(df, manifest_vars_X, manifest_vars_M, manifest_vars_Y, n_comp=1):
    """Compute PLS components for blocks (X, M, Y). Returns df with columns X_score, M_score, Y_score."""
    # For each block, we'll extract one component predicting the target block (simple approach).
    # PLSRegression requires numeric arrays.
    # Here we compute a single latent score per block by using that block's variables to predict the block target (self).
    def pls_block_score(block_df, n_comp):
        pls = PLSRegression(n_components=n_comp)
        X = block_df.values
        # Fit self to get weights: use SVD-like behaviour: we can fit X->X (works) to recover component directions.
        pls.fit(X, X)
        scores = pls.x_scores_[:, 0]  # first component
        return scores

    scores = pd.DataFrame()
    if len(manifest_vars_X) > 0:
        scores['X_score'] = pls_block_score(df[manifest_vars_X], n_comp=n_comp)
    if len(manifest_vars_M) > 0:
        scores['M_score'] = pls_block_score(df[manifest_vars_M], n_comp=n_comp)
    if len(manifest_vars_Y) > 0:
        scores['Y_score'] = pls_block_score(df[manifest_vars_Y], n_comp=n_comp)
    return scores


In [36]:
# ---------- Step 8: Regression Models ----------
# Model 1: Work Engagement (M) predicted by Positive Emotion (X) and moderation (XW)
X_M = sm.add_constant(df[['PositiveEmotion_c', 'BAN_AI_Policy_c', 'XW']])
model_M = sm.OLS(df['WorkEngagement'], X_M).fit()
print("\n=== Model 1: Positive Emotion → Work Engagement (Moderated by BAN-AI Policy) ===")
print(model_M.summary())

# Model 2: Productivity (Y) predicted by Work Engagement (M), moderation (MW), and direct X
M_Y = sm.add_constant(df[['PositiveEmotion_c', 'WorkEngagement_c', 'BAN_AI_Policy_c', 'MW']])
model_Y = sm.OLS(df['Productivity'], M_Y).fit()
print("\n=== Model 2: Work Engagement → Productivity (Moderated by BAN-AI Policy) ===")
print(model_Y.summary())



=== Model 1: Positive Emotion → Work Engagement (Moderated by BAN-AI Policy) ===
                            OLS Regression Results                            
Dep. Variable:         WorkEngagement   R-squared:                       0.188
Model:                            OLS   Adj. R-squared:                  0.180
Method:                 Least Squares   F-statistic:                     23.29
Date:                Sun, 26 Oct 2025   Prob (F-statistic):           1.39e-13
Time:                        10:16:30   Log-Likelihood:                -321.62
No. Observations:                 305   AIC:                             651.2
Df Residuals:                     301   BIC:                             666.1
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------

In [37]:
# ---------- Step 9: Conditional Indirect Effects ----------
def conditional_indirect_effect(model_M, model_Y, w_value):
    a1 = model_M.params['PositiveEmotion_c']
    a3 = model_M.params.get('XW', 0)
    b1 = model_Y.params['WorkEngagement_c']
    b4 = model_Y.params.get('MW', 0)
    a_at_w = a1 + a3 * w_value
    b_at_w = b1 + b4 * w_value
    indirect = a_at_w * b_at_w
    return indirect

# Compute effects at low, mean, high W
w_mean = df['BAN_AI_Policy_c'].mean()
w_low = df['BAN_AI_Policy_c'].quantile(0.25)
w_high = df['BAN_AI_Policy_c'].quantile(0.75)

print("\n=== Conditional Indirect Effects ===")
for w in [w_low, w_mean, w_high]:
    ie = conditional_indirect_effect(model_M, model_Y, w)
    print(f"Indirect effect at BAN-AI Policy (centered={w:.3f}): {ie:.5f}")

# ---------- Step 10: Bootstrapping the Indirect Effect ----------
n_boot = 1000
np.random.seed(42)
boot_results = []

for _ in trange(n_boot, desc="Bootstrapping"):
    sample = df.sample(frac=1, replace=True)
    mM = sm.OLS(sample['WorkEngagement'], sm.add_constant(sample[['PositiveEmotion_c', 'BAN_AI_Policy_c', 'XW']])).fit()
    mY = sm.OLS(sample['Productivity'], sm.add_constant(sample[['PositiveEmotion_c', 'WorkEngagement_c', 'BAN_AI_Policy_c', 'MW']])).fit()
    boot_results.append(conditional_indirect_effect(mM, mY, w_mean))

ci_low, ci_high = np.percentile(boot_results, [2.5, 97.5])
print(f"\nBootstrapped 95% CI for indirect effect (mean BAN-AI Policy): [{ci_low:.5f}, {ci_high:.5f}]")

print("\n✅ Analysis complete! Use the regression summaries and conditional effects for your FYP results.")


=== Conditional Indirect Effects ===
Indirect effect at BAN-AI Policy (centered=-0.378): 0.03163
Indirect effect at BAN-AI Policy (centered=-0.000): 0.06717
Indirect effect at BAN-AI Policy (centered=0.422): 0.11566


Bootstrapping: 100%|██████████| 1000/1000 [00:04<00:00, 236.03it/s]


Bootstrapped 95% CI for indirect effect (mean BAN-AI Policy): [0.01555, 0.13319]

✅ Analysis complete! Use the regression summaries and conditional effects for your FYP results.





In [None]:
# =================== SAVE RESULTS + PLOTS ===================
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os

# Create output folder
output_dir = "/workspaces/Impact-of-Employee-Emotions-on-Productivity-with-the-Role-of-AI-Policy-Research/output"
os.makedirs(output_dir, exist_ok=True)

# ========== 1️⃣ SAVE REGRESSION TABLES TO EXCEL ==========
summary_data = {
    "Model_1": {
        "Variable": model_M.params.index,
        "Coef": model_M.params.values,
        "StdErr": model_M.bse.values,
        "t": model_M.tvalues.values,
        "p": model_M.pvalues.values,
    },
    "Model_2": {
        "Variable": model_Y.params.index,
        "Coef": model_Y.params.values,
        "StdErr": model_Y.bse.values,
        "t": model_Y.tvalues.values,
        "p": model_Y.pvalues.values,
    },
}

# Conditional indirect effects table
cond_data = []
for w_label, w_val in zip(["Low Policy", "Mean Policy", "High Policy"], [w_low, w_mean, w_high]):
    ie = conditional_indirect_effect(model_M, model_Y, w_val)
    cond_data.append({"AI Policy Level": w_label, "Indirect Effect": ie})

df_indirect = pd.DataFrame(cond_data)

# Write all tables to a single Excel file
excel_path = os.path.join(output_dir, "Model_Results.xlsx")
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
    pd.DataFrame(summary_data["Model_1"]).to_excel(writer, sheet_name="Model_1_WorkEngagement", index=False)
    pd.DataFrame(summary_data["Model_2"]).to_excel(writer, sheet_name="Model_2_Productivity", index=False)
    df_indirect.to_excel(writer, sheet_name="Conditional_Indirect", index=False)

print(f"✅ All regression results saved to {excel_path}")

# ========== 2️⃣ PLOTS: MODERATION EFFECTS ==========

# ---- Plot A: Positive Emotion → Work Engagement ----
# Create predicted engagement at low/high AI policy
x_vals = np.linspace(df['PositiveEmotion_c'].min(), df['PositiveEmotion_c'].max(), 100)
low_policy = df['BAN_AI_Policy_c'].quantile(0.25)
high_policy = df['BAN_AI_Policy_c'].quantile(0.75)

pred_low = (model_M.params['const']
            + model_M.params['PositiveEmotion_c']*x_vals
            + model_M.params['BAN_AI_Policy_c']*low_policy
            + model_M.params['XW']*x_vals*low_policy)
pred_high = (model_M.params['const']
            + model_M.params['PositiveEmotion_c']*x_vals
            + model_M.params['BAN_AI_Policy_c']*high_policy
            + model_M.params['XW']*x_vals*high_policy)

plt.figure(figsize=(7,5))
sns.lineplot(x=x_vals, y=pred_low, label="Low BAN-AI Policy", color="red")
sns.lineplot(x=x_vals, y=pred_high, label="High BAN-AI Policy", color="blue")
plt.title("Moderation: Positive Emotion → Work Engagement")
plt.xlabel("Positive Emotion (centered)")
plt.ylabel("Predicted Work Engagement")
plt.legend()
plt.tight_layout()
plot1_path = os.path.join(output_dir, "Moderation_PosEmotion_WorkEngagement.png")
plt.savefig(plot1_path, dpi=300)
plt.close()

# ---- Plot B: Work Engagement → Productivity ----
x_vals2 = np.linspace(df['WorkEngagement_c'].min(), df['WorkEngagement_c'].max(), 100)
pred_low2 = (model_Y.params['const']
             + model_Y.params['WorkEngagement_c']*x_vals2
             + model_Y.params['BAN_AI_Policy_c']*low_policy
             + model_Y.params['MW']*x_vals2*low_policy
             + model_Y.params['PositiveEmotion_c']*0)  # keep X constant

pred_high2 = (model_Y.params['const']
             + model_Y.params['WorkEngagement_c']*x_vals2
             + model_Y.params['BAN_AI_Policy_c']*high_policy
             + model_Y.params['MW']*x_vals2*high_policy
             + model_Y.params['PositiveEmotion_c']*0)

plt.figure(figsize=(7,5))
sns.lineplot(x=x_vals2, y=pred_low2, label="Low BAN-AI Policy", color="red")
sns.lineplot(x=x_vals2, y=pred_high2, label="High BAN-AI Policy", color="blue")
plt.title("Moderation: Work Engagement → Productivity")
plt.xlabel("Work Engagement (centered)")
plt.ylabel("Predicted Productivity")
plt.legend()
plt.tight_layout()
plot2_path = os.path.join(output_dir, "Moderation_WorkEngagement_Productivity.png")
plt.savefig(plot2_path, dpi=300)
plt.close()

print(f"✅ Plots saved: \n  {plot1_path}\n  {plot2_path}")

# Optional quick preview of the conditional indirect effects
print("\n=== Conditional Indirect Effects Table ===")
print(df_indirect)
