In [1]:
import os

os.getcwd()

'c:\\Users\\86139\\Desktop\\PARA Note System\\Projects\\Inter-University Health Data\\2025-Inter-Univer-Health-Data\\Code'

In [2]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np

hrsWave = pd.read_csv("../Data/hrsWaveCleaned.csv")

n = 1000
hhidpn = np.random.choice(hrsWave["HHIDPN"].unique(), size = n)
idx = hrsWave["HHIDPN"].isin(hhidpn)
df = hrsWave.loc[idx, :]

In [14]:
# Assume your data is in a pandas DataFrame called 'df'
# with columns: 'recall_score' (your outcome), 'age', 'job', 'HHIDPN' (subject ID)

# 1. Define the Binomial family and logit link
# The `Binomial` family in statsmodels assumes endog is proportions (e.g., successes/n_trials)
# or a 2-column array where col 0 is successes and col 1 is failures.
# Since your score is 0-20, you should pass it as a two-column array: [recall_score, 20 - recall_score]

n_trials = 20
df['RwRecFail'] = n_trials - df['RwTR20']

# Create a 2-column array for endog if using the formula API with a non-standard endog
# For GEE with Binomial, endog usually expects a proportion (successes/n_trials) or a (successes, total_trials) tuple/array
# If using `smf.gee`, it's often more straightforward to define `endog` as proportion.
df['RwRecProp'] = df['RwTR20'] / n_trials

# 2. Define the exchangeable correlation structure
exchangeable_corr = sm.cov_struct.Exchangeable()
autoregress_corr = sm.cov_struct.Autoregressive()
unstructure_corr = sm.cov_struct.Independence()


# 3. Specify Full Model Formula
formulaFull = "RwRecProp ~ RwAGEM_B * C(RwJOCCSD, Treatment(reference='Retired'))"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['RwRecFail'] = n_trials - df['RwTR20']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['RwRecProp'] = df['RwTR20'] / n_trials


In [15]:
# 3. Fit the GEE model using the formula API with robust sandwich covariance matrix
modelExch = smf.gee(formulaFull, groups="HHIDPN", data=df,
                cov_struct=exchangeable_corr,
                family=sm.families.Binomial())

modelAR = smf.gee(formulaFull, groups="HHIDPN", data=df,
                cov_struct=autoregress_corr,
                family=sm.families.Binomial())

modelUnstruct = smf.gee(formulaFull, groups="HHIDPN", data=df,
                cov_struct=unstructure_corr,
                family=sm.families.Binomial())

# Fit the model with robust covariance
resultsExch = modelExch.fit(cov_type="robust")
resultsAR = modelAR.fit(cov_type="robust")
resultsUnstruct = modelUnstruct.fit(cov_type="robust")

In [16]:
# Display the summary of all fitted GEE models
print("Autoregressive correlation structure:\n")
print(resultsAR.summary())

print("\nExchangeable correlation structure:\n")
print(resultsExch.summary())

print("\nUnstructured (Independence) correlation structure:\n")
print(resultsUnstruct.summary())

Autoregressive correlation structure:

                               GEE Regression Results                              
Dep. Variable:                   RwRecProp   No. Observations:                 3984
Model:                                 GEE   No. clusters:                      902
Method:                        Generalized   Min. cluster size:                   1
                      Estimating Equations   Max. cluster size:                  11
Family:                           Binomial   Mean cluster size:                 4.4
Dependence structure:       Autoregressive   Num. iterations:                    22
Date:                     Fri, 27 Jun 2025   Scale:                           1.000
Covariance type:                    robust   Time:                         00:14:50
                                                                                               coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------

In [40]:
# Calculate QIC for each model as a measure of goodness of fit
def calc_qic(model_result):
    # QIC = -2 * quasi-likelihood + 2 * trace(V_hat^-1 * V_model)
    # statsmodels does not provide QIC directly, but we can use qic() if available
    try:
        return model_result.qic()[0]
    except AttributeError:
        return np.nan

qic_exch = calc_qic(resultsExch)
qic_ar = calc_qic(resultsAR)
qic_unstruct = calc_qic(resultsUnstruct)

print(f"QIC (Exchangeable): {qic_exch}")
print(f"QIC (Autoregressive): {qic_ar}")
print(f"QIC (Unstructured): {qic_unstruct}")

QIC (Exchangeable): 466.59570969134296
QIC (Autoregressive): 467.5667953306065
QIC (Unstructured): 457.15492936305395


The base model with independence covariance structure yields the lowest QIC, marking the best performing base model.

## 2. Variable Selection by QIC:

In [49]:
## Choose the control variables
controlVars = ['RwWORK', 'RwJHOURS', 'RwWGIHR',
       'RwJPHYS', 'RwJLIFT', 'RwJSTRES', 'RwJSTOOP', 'RwJSIGHT', 'RwCENREG',
       'RwMSTAT', 'RwLIVBRO', 'RwHIBP', 'RwDIAB', 'RwCANCR',
       'RwLUNG', 'RwHEART', 'RwSTROK', 'RwPSYCH', 'RwVIGACT', 'RwSMOKEV',
       'RwDRINK', 'RwPhyLim', 'RwCogLim', 'RwAnyCogImp', 'RwLOST',
       'RwWANDER', 'RwHALUC', 'RwALONE', 'HwATOTB', 'HwADEBT', 'HwACHCK',
       'HwAMRTB', 'HwITOT']

baseFormula = "RwRecProp ~ RwAGEM_B * C(RwJOCCSD, Treatment(reference='Retired'))"
fullFormula = baseFormula + ' + ' + ' + '.join(controlVars)

print(fullFormula)

RwRecProp ~ RwAGEM_B * C(RwJOCCSD, Treatment(reference='Retired')) + RwWORK + RwJHOURS + RwWGIHR + RwJPHYS + RwJLIFT + RwJSTRES + RwJSTOOP + RwJSIGHT + RwCENREG + RwMSTAT + RwLIVBRO + RwHIBP + RwDIAB + RwCANCR + RwLUNG + RwHEART + RwSTROK + RwPSYCH + RwVIGACT + RwSMOKEV + RwDRINK + RwPhyLim + RwCogLim + RwAnyCogImp + RwLOST + RwWANDER + RwHALUC + RwALONE + HwATOTB + HwADEBT + HwACHCK + HwAMRTB + HwITOT


In [67]:
import re
import numpy as np
def stepwise_selection_qic(data, groups, cov_struct, family, start_formula, end_formula, verbose=True, qic_threshold=0):
    """
    Perform stepwise (forward and backward) feature selection based on QIC for GEE models.

    Parameters:
        data: pandas.DataFrame
            The dataset containing all variables used in the formulas.
        groups: array-like
            Grouping variable for GEE (e.g., subject or cluster IDs).
        cov_struct: statsmodels.genmod.cov_struct.CovStruct
            Covariance structure for GEE (e.g., Exchangeable, Autoregressive).
        family: statsmodels.genmod.families.Family
            The family object for GEE (e.g., Gaussian, Binomial).
        start_formula: str
            The starting model formula (patsy syntax).
        end_formula: str
            The full model formula (patsy syntax, includes all candidate variables).
        verbose: bool, optional
            If True, prints progress at each step.
        qic_threshold: float, optional
            Minimum QIC improvement required to continue selection.

    Returns:
        best_formula: str, formula of the best model found
        best_result: fitted GEE result object
        history: list of (formula, QIC)
    """
    import statsmodels.formula.api as smf

    def get_terms(formula):
        rhs = formula.split('~')[1]
        terms = [t.strip() for t in re.split(r'\s*\+\s*', rhs) if t.strip() != '']
        terms = [t for t in terms if t != '1']
        return set(terms)

    def build_formula(lhs, terms):
        if not terms:
            return f"{lhs} ~ 1"
        return f"{lhs} ~ {' + '.join(sorted(terms))}"

    def calc_qic(result):
        try:
            return result.qic(scale = 1)[0]
        except Exception:
            return np.nan

    lhs = start_formula.split('~')[0].strip()
    start_terms = get_terms(start_formula)
    end_terms = get_terms(end_formula)
    current_terms = set(start_terms)
    history = []

    # Fit initial model
    current_formula = build_formula(lhs, current_terms)
    model = smf.gee(current_formula, groups=groups, data=data, cov_struct=cov_struct, family=family)
    result = model.fit(cov_type="robust")
    best_qic = calc_qic(result)
    best_formula = current_formula
    best_result = result
    history.append((current_formula, best_qic))

    improved = True
    while improved:
        improved = False
        # Forward step
        qic_candidates = []
        formulas = []
        term_changes = []
        for term in sorted(end_terms - current_terms):
            new_terms = current_terms | {term}
            formula = build_formula(lhs, new_terms)
            try:
                model = smf.gee(formula, groups=groups, data=data, cov_struct=cov_struct, family=family)
                result = model.fit(cov_type="robust")
                qic = calc_qic(result)
            except Exception:
                qic = np.nan
            qic_candidates.append(qic)
            formulas.append(formula)
            term_changes.append(('add', term))

        # Backward step
        for term in sorted(current_terms - start_terms):
            new_terms = current_terms - {term}
            formula = build_formula(lhs, new_terms)
            try:
                model = smf.gee(formula, groups=groups, data=data, cov_struct=cov_struct, family=family)
                result = model.fit(cov_type="robust")
                qic = calc_qic(result)
            except Exception:
                qic = np.nan
            qic_candidates.append(qic)
            formulas.append(formula)
            term_changes.append(('remove', term))
        
        print(qic_candidates)
        if qic_candidates:
            min_idx = np.nanargmin(qic_candidates)
            min_qic = qic_candidates[min_idx]
            if (best_qic - min_qic) > qic_threshold:
                improved = True
                best_qic = min_qic
                best_formula = formulas[min_idx]
                action, term = term_changes[min_idx]
                if action == 'add':
                    current_terms.add(term)
                elif action == 'remove':
                    current_terms.remove(term)
                model = smf.gee(best_formula, groups=groups, data=data, cov_struct=cov_struct, family=family)
                best_result = model.fit(cov_type="robust")
                history.append((best_formula, best_qic))
                if verbose:
                    print(f"Step: {action}, QIC: {best_qic:.2f}, Formula: {best_formula}")
            else:
                if verbose:
                    print("No QIC improvement above threshold, stopping.")
        else:
            if verbose:
                print("No candidates left, stopping.")

    return best_formula, best_result, history

In [68]:
optimal_form, optimal_result, history =\
    stepwise_selection_qic(df, df["HHIDPN"], unstructure_corr, 
                        sm.families.Binomial(), 
                        baseFormula, fullFormula, 
                        verbose=False, 
                        qic_threshold=10)

[435.83263634926936, 457.2259251676483, 456.6747616680278, 419.7138565753232, 441.91179673294124, 457.15492936305526, 457.15492936305526, 456.3926943601335, 456.5693652442964, 436.18290614079535, 455.01107853327295, 441.37947915249913, 457.15492936305526, 456.8823585448688, 457.20842785562525, 92.07176543705019, 93.14946328034293, 93.15279634059226, 91.58763597041218, 92.0790799879403, 93.89074320324876, 454.75932747817257, 457.15492936305526, 457.2838862955956, 457.11884256065395, 455.0765082457663, 448.86633108424473, 451.12854138994135, 455.48064279175964, 457.00399307838495, 457.15492936305526, 79.30012443926421, 457.61733304620833]
[77.63519125151687, 79.3348400485419, 79.21759845668414, 73.04141629947159, 77.62352291575326, 79.30012443861887, 79.30012443861887, 79.3591098090774, 79.11747669671998, 78.37668080583794, 78.1887987036809, 78.55713651195707, 79.30012443861887, 78.68301751560641, 79.32925584218408, 75.7558561531197, 75.95712880352464, -335.21872514817414, 73.79885967867

In [69]:
print(optimal_form)
print(optimal_result.summary())
print(history[-1])

RwRecProp ~ RwAGEM_B * C(RwJOCCSD, Treatment(reference='Retired')) + RwJPHYS + RwWGIHR
                               GEE Regression Results                              
Dep. Variable:                   RwRecProp   No. Observations:                  698
Model:                                 GEE   No. clusters:                      327
Method:                        Generalized   Min. cluster size:                   1
                      Estimating Equations   Max. cluster size:                  11
Family:                           Binomial   Mean cluster size:                 2.1
Dependence structure:         Independence   Num. iterations:                     2
Date:                     Fri, 27 Jun 2025   Scale:                           1.000
Covariance type:                    robust   Time:                         04:09:26
                                                                                               coef    std err          z      P>|z|      [0.025      0.975]


## 3. Fit the optimal model on the entire dataset and test Hypothesis on the Optimal Model

In [None]:
import numpy as np
import re

optimal_cov = optimal_result.cov_params()

## R matrix: interactions between RwAGEM_B and RwJOCCSD are 0
# Get parameter names from the optimal model
param_names = optimal_result.params.index.tolist()

# Find interaction terms between RwAGEM_B and RwJOCCSD
# interaction_pattern = re.compile(r"RwAGEM_B:C\(RwJOCCSD.*\)")
interaction_pattern = re.compile(r"RwJPHYS")

interaction_indices = [i for i, name in enumerate(param_names) if interaction_pattern.search(name)]

# Create R matrix: one row for each interaction, columns = number of params
r_matrix = np.zeros((len(interaction_indices), len(param_names)))
for row, idx in enumerate(interaction_indices):
    r_matrix[row, idx] = 1

optimal_result.wald_test(r_matrix, cov_p=optimal_cov)




<class 'statsmodels.stats.contrast.ContrastResults'>
<Wald test (chi2): statistic=[[4.17300822e-15]], p-value=0.9999999484575947, df_denom=1>

In [76]:
optimal_result.cov_params()

Unnamed: 0,Intercept,"C(RwJOCCSD, Treatment(reference='Retired'))[T.Farming/Forestry/Fishing]","C(RwJOCCSD, Treatment(reference='Retired'))[T.Food/Personal/Service]","C(RwJOCCSD, Treatment(reference='Retired'))[T.Healthcare]","C(RwJOCCSD, Treatment(reference='Retired'))[T.High Risk Occupations]","C(RwJOCCSD, Treatment(reference='Retired'))[T.Management/Clerical/Business]","C(RwJOCCSD, Treatment(reference='Retired'))[T.STEM/Professional/Technical]","C(RwJOCCSD, Treatment(reference='Retired'))[T.Sales]","C(RwJOCCSD, Treatment(reference='Retired'))[T.Skilled Trades/Production/Manual]",RwAGEM_B,"RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.Farming/Forestry/Fishing]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.Food/Personal/Service]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.Healthcare]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.High Risk Occupations]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.Management/Clerical/Business]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.STEM/Professional/Technical]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.Sales]","RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retired'))[T.Skilled Trades/Production/Manual]",RwJPHYS,RwWGIHR
Intercept,1708424000000000.0,-2060898000000000.0,-2410482000000000.0,-2279355000000000.0,-2429594000000000.0,-1600976000000000.0,-1767499000000000.0,-2771201000000000.0,-2115684000000000.0,-25818570000000.0,19554780000000.0,24764220000000.0,29520000000000.0,26267060000000.0,37813580000000.0,31282670000000.0,33497930000000.0,20788330000000.0,21830640000.0,13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Farming/Forestry/Fishing]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Food/Personal/Service]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Healthcare]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.High Risk Occupations]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Management/Clerical/Business]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.STEM/Professional/Technical]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Sales]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Skilled Trades/Production/Manual]",-1708424000000000.0,2060898000000000.0,2410482000000000.0,2279355000000000.0,2429594000000000.0,1600976000000000.0,1767499000000000.0,2771201000000000.0,2115684000000000.0,25818570000000.0,-19554780000000.0,-24764220000000.0,-29520000000000.0,-26267060000000.0,-37813580000000.0,-31282670000000.0,-33497930000000.0,-20788330000000.0,-21830640000.0,-13055340.0
RwAGEM_B,-36110420000000.0,44917850000000.0,52611410000000.0,48837190000000.0,51894120000000.0,34169510000000.0,38133240000000.0,59765570000000.0,44864720000000.0,561973400000.0,-430303700000.0,-522889400000.0,-622364600000.0,-557387200000.0,-802110900000.0,-669344900000.0,-716790200000.0,-449301000000.0,-0.01241102,2.551587e-05


In [55]:
# Retrieve the covariance matrices of the fitted GEE models
cov_exch = resultsExch.cov_params()
cov_ar = resultsAR.cov_params()
cov_unstruct = resultsUnstruct.cov_params()

print("Covariance matrix (Exchangeable):\n", cov_exch)
print("\nCovariance matrix (Autoregressive):\n", cov_ar)
print("\nCovariance matrix (Unstructured):\n", cov_unstruct)

Covariance matrix (Exchangeable):
                                                     Intercept  \
Intercept                                            0.017169   
C(RwJOCCSD, Treatment(reference='Retired'))[T.F...  -0.012156   
C(RwJOCCSD, Treatment(reference='Retired'))[T.F...  -0.014945   
C(RwJOCCSD, Treatment(reference='Retired'))[T.H...  -0.006204   
C(RwJOCCSD, Treatment(reference='Retired'))[T.H...  -0.016171   
C(RwJOCCSD, Treatment(reference='Retired'))[T.M...  -0.010988   
C(RwJOCCSD, Treatment(reference='Retired'))[T.S...  -0.015787   
C(RwJOCCSD, Treatment(reference='Retired'))[T.S...  -0.014839   
C(RwJOCCSD, Treatment(reference='Retired'))[T.S...  -0.015275   
RwAGEM_B                                            -0.000245   
RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retir...   0.000167   
RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retir...   0.000210   
RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retir...   0.000089   
RwAGEM_B:C(RwJOCCSD, Treatment(reference='Retir...   0.

In [81]:
optimal_form

"RwRecProp ~ RwAGEM_B * C(RwJOCCSD, Treatment(reference='Retired')) + RwJPHYS + RwWGIHR"

In [89]:
modelUnstruct2 = smf.gee("RwRecProp ~ RwAGEM_B * C(RwJOCCSD, Treatment(reference='Retired')) + RwWGIHR", 
                         groups="HHIDPN", data=df,
                        cov_struct=unstructure_corr,
                        family=sm.families.Binomial()).fit(cov_type="robust")


modelUnstruct2.summary()

0,1,2,3
Dep. Variable:,RwRecProp,No. Observations:,763
Model:,GEE,No. clusters:,355
Method:,Generalized,Min. cluster size:,1
,Estimating Equations,Max. cluster size:,11
Family:,Binomial,Mean cluster size:,2.1
Dependence structure:,Independence,Num. iterations:,2
Date:,"Fri, 27 Jun 2025",Scale:,1.000
Covariance type:,robust,Time:,04:29:09

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.7224,0.807,0.895,0.371,-0.860,2.305
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Farming/Forestry/Fishing]",1.5326,1.589,0.965,0.335,-1.581,4.647
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Food/Personal/Service]",0.0184,0.959,0.019,0.985,-1.862,1.899
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Healthcare]",-1.5295,1.661,-0.921,0.357,-4.785,1.726
"C(RwJOCCSD, Treatment(reference='Retired'))[T.High Risk Occupations]",2.5511,1.354,1.884,0.060,-0.103,5.205
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Management/Clerical/Business]",1.2640,0.904,1.398,0.162,-0.508,3.037
"C(RwJOCCSD, Treatment(reference='Retired'))[T.STEM/Professional/Technical]",0.5487,0.976,0.562,0.574,-1.365,2.462
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Sales]",-0.6854,1.061,-0.646,0.518,-2.766,1.395
"C(RwJOCCSD, Treatment(reference='Retired'))[T.Skilled Trades/Production/Manual]",-0.1640,0.937,-0.175,0.861,-2.001,1.673

0,1,2,3
Skew:,0.0584,Kurtosis:,0.2186
Centered skew:,-0.2063,Centered kurtosis:,2.2707
