In [2]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from statsmodels.regression.mixed_linear_model import MixedLM

In [16]:
def test_main_effect_explanation_one(df_reg):
    """
    Test the explanation for the negative main effect of similarity:
    "Difficult/effortful goals that weren't completed today get carried over 
    as similar goals tomorrow, and they remain difficult"
    
    Two key tests:
    1. High effort today → Lower completion today
    2. High effort + Low completion today → High similarity with tomorrow
    """
    
    print("="*80)
    print("TESTING MAIN EFFECT EXPLANATION")
    print("="*80)
    print("\nHypothesis:")
    print("Difficult/effortful goals that weren't completed today get carried over")
    print("as similar goals tomorrow, and they remain difficult.")
    print("\n" + "="*80)
    
    
    # ========================================================================
    # TEST 1: Does high effort today predict lower completion today?
    # ========================================================================
    print("\n" + "="*80)
    print("TEST 1: Does high effort today predict lower completion today?")
    print("Model: tomorrow_goal_completion ~ tomorrow_goal_effort")
    print("="*80)
    print("\nExpectation: Negative coefficient (high effort → low completion)")
    print("="*80 + "\n")

    model1 = MixedLM.from_formula(
        'tomorrow_goal_completion ~ tomorrow_goal_effort',
        data=df_reg,
        groups=df_reg['ParticipantIdentifier']
    )
    
    result1 = model1.fit(reml=True)
    print(result1.summary())
    
    # Extract coefficient
    coef_effort = result1.params['tomorrow_goal_effort']
    se_effort = result1.bse['tomorrow_goal_effort']
    pval_effort = result1.pvalues['tomorrow_goal_effort']
    ci_lower = result1.conf_int().loc['tomorrow_goal_effort', 0]
    ci_upper = result1.conf_int().loc['tomorrow_goal_effort', 1]
    
    print(f"\n{'='*80}")
    print("INTERPRETATION - TEST 1:")
    print(f"{'='*80}")
    print(f"Coefficient: β = {coef_effort:.3f}, SE = {se_effort:.3f}")
    print(f"p-value: {pval_effort:.4f}")
    print(f"95% CI: [{ci_lower:.3f}, {ci_upper:.3f}]")
    
    if pval_effort < 0.05:
        if coef_effort < 0:
            print(f"\n✓ CONFIRMED: Higher effort today predicts LOWER completion today")
            print(f"  → Each 1-point increase in effort decreases completion by {abs(coef_effort):.2f} percentage points")
            print(f"  → Difficult goals are indeed harder to complete")
        else:
            print(f"\n✗ OPPOSITE FINDING: Higher effort predicts HIGHER completion today")
            print(f"  → This contradicts the hypothesis")
    else:
        print(f"\n✗ NO RELATIONSHIP: Effort doesn't significantly predict completion today")
        print(f"  → Difficult goals aren't necessarily harder to complete")
    

def test_main_effect_explanation_two(df_reg):
    # ========================================================================
    # TEST 2: Do high effort and low completion today predict high similarity?
    # ========================================================================
    print("\n\n" + "="*80)
    print("TEST 2: Do high effort and low completion today predict high similarity?")
    print("Model: goal_sim ~ today_goal_effort * today_goal_completion")
    print("="*80)
    print("\nExpectations:")
    print("  - Main effect of effort: Positive (high effort → high similarity)")
    print("  - Interaction: Negative (effect of effort stronger when completion is LOW)")
    print("="*80 + "\n")
    
    model2 = MixedLM.from_formula(
        'goal_sim ~ today_goal_effort * today_goal_completion',
        data=df_reg,
        groups=df_reg['ParticipantIdentifier']
    )
    
    result2 = model2.fit(reml=True)
    print(result2.summary())
    
    # Extract coefficients
    coef_effort_main = result2.params['today_goal_effort']
    pval_effort_main = result2.pvalues['today_goal_effort']
    
    coef_completion_main = result2.params['today_goal_completion']
    pval_completion_main = result2.pvalues['today_goal_completion']
    
    coef_interaction = result2.params['today_goal_effort:today_goal_completion']
    se_interaction = result2.bse['today_goal_effort:today_goal_completion']
    pval_interaction = result2.pvalues['today_goal_effort:today_goal_completion']
    ci_int_lower = result2.conf_int().loc['today_goal_effort:today_goal_completion', 0]
    ci_int_upper = result2.conf_int().loc['today_goal_effort:today_goal_completion', 1]
    
    print(f"\n{'='*80}")
    print("INTERPRETATION - TEST 2:")
    print(f"{'='*80}")
    
    print(f"\nMain effect of effort: β = {coef_effort_main:.4f}, p = {pval_effort_main:.4f}")
    print(f"Main effect of completion: β = {coef_completion_main:.4f}, p = {pval_completion_main:.4f}")
    print(f"Interaction (effort × completion): β = {coef_interaction:.6f}, SE = {se_interaction:.6f}")
    print(f"  p-value: {pval_interaction:.4f}")
    print(f"  95% CI: [{ci_int_lower:.6f}, {ci_int_upper:.6f}]")
    
    # Analyze main effect of effort
    print(f"\n--- Main Effect of Effort ---")
    if pval_effort_main < 0.05:
        if coef_effort_main > 0:
            print(f"✓ High effort today → Higher similarity tomorrow (β = {coef_effort_main:.4f})")
            print(f"  Difficult goals DO get carried over as similar goals")
        else:
            print(f"✗ High effort today → LOWER similarity tomorrow (β = {coef_effort_main:.4f})")
    else:
        print(f"✗ No main effect of effort on similarity (p = {pval_effort_main:.4f})")
    
    # Analyze interaction
    print(f"\n--- Interaction Effect ---")
    if pval_interaction < 0.05:
        if coef_interaction < 0:
            print(f"✓ CONFIRMED: The effect of effort on similarity is STRONGER when completion is LOW")
            print(f"  (Negative interaction: β = {coef_interaction:.6f}, p = {pval_interaction:.4f})")
            print(f"\n  → Interpretation:")
            print(f"     When people work hard but don't complete goals today,")
            print(f"     they're MORE likely to set similar goals tomorrow")
        else:
            print(f"✗ OPPOSITE: The effect is stronger when completion is HIGH")
            print(f"  (Positive interaction: β = {coef_interaction:.6f}, p = {pval_interaction:.4f})")
    else:
        print(f"✗ No interaction: Effect of effort doesn't depend on completion level")
        print(f"  (Interaction: β = {coef_interaction:.6f}, p = {pval_interaction:.4f})")
    
    # Simple slopes analysis
    print(f"\n--- Simple Slopes Analysis ---")
    print(f"Effect of effort on similarity at different completion levels:")
    
    # Calculate slopes at different completion levels
    completion_levels = [0, 25, 50, 75, 100]
    print(f"\n{'Completion Today':<20} {'Effect of Effort':<20} {'Interpretation'}")
    print(f"{'-'*70}")
    
    for comp in completion_levels:
        slope = coef_effort_main + (coef_interaction * comp)
        if slope > 0:
            interp = "↑ Effort → ↑ Similarity"
        elif slope < 0:
            interp = "↑ Effort → ↓ Similarity"
        else:
            interp = "No effect"
        print(f"{comp}%{' ':<17} {slope:>8.5f}{' ':<12} {interp}")

In [17]:
path = "/Users/farhan/projects/NROC90/data/proc/reg_ready.csv"
df = pd.read_csv(path)

In [25]:
df = df.rename(columns={
    'max_sim_goal_completion_today': "today_goal_completion",
    'max_sim_goal_effort_today': "today_goal_effort",
    'max_sim_tomorrow_with_today': 'goal_sim'
})

In [26]:
df.columns.to_list()

['ParticipantIdentifier',
 'trial_date',
 'tomorrow_goal',
 'today_goal_1',
 'today_goal_2',
 'today_completion_1',
 'today_completion_2',
 'today_effort_1',
 'today_effort_2',
 'today_importance_1',
 'today_importance_2',
 'sim_tomorrow_with_today1',
 'sim_tomorrow_with_today2',
 'same_as_tomorrow_today1',
 'same_as_tomorrow_today2',
 'which_tomorrow_goal',
 'goal_sim',
 'most_similar_is_goal1',
 'max_sim_same_as_tomorrow',
 'today_goal_completion',
 'today_goal_effort',
 'max_sim_goal_importance_today',
 'tomorrow_goal_completion',
 'tomorrow_goal_effort',
 'tomorrow_goal_importance']

In [27]:
results = test_main_effect_explanation_one(df)

TESTING MAIN EFFECT EXPLANATION

Hypothesis:
Difficult/effortful goals that weren't completed today get carried over
as similar goals tomorrow, and they remain difficult.


TEST 1: Does high effort today predict lower completion today?
Model: tomorrow_goal_completion ~ tomorrow_goal_effort

Expectation: Negative coefficient (high effort → low completion)

                Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: tomorrow_goal_completion
No. Observations: 14660   Method:             REML                    
No. Groups:       112     Scale:              1222.8220               
Min. group size:  60      Log-Likelihood:     -73077.4943             
Max. group size:  164     Converged:          Yes                     
Mean group size:  130.9                                               
-----------------------------------------------------------------------
                        Coef.   Std.Err.    z     P>|z|  [0.025  0.975]
------------------

In [28]:
results = test_main_effect_explanation_two(df)



TEST 2: Do high effort and low completion today predict high similarity?
Model: goal_sim ~ today_goal_effort * today_goal_completion

Expectations:
  - Main effect of effort: Positive (high effort → high similarity)
  - Interaction: Negative (effect of effort stronger when completion is LOW)

                      Mixed Linear Model Regression Results
Model:                     MixedLM          Dependent Variable:          goal_sim 
No. Observations:          14660            Method:                      REML     
No. Groups:                112              Scale:                       0.0650   
Min. group size:           60               Log-Likelihood:              -982.7203
Max. group size:           164              Converged:                   Yes      
Mean group size:           130.9                                                  
----------------------------------------------------------------------------------
                                        Coef.  Std.Err.   z    