In [39]:
import os
import pandas as pd
import numpy as np
from semopy import Model, Optimizer

# Set to 'Y' to segment results by a specific column or 'N' to run the model on the entire dataset
segment_results = 'N'  # 'Y' or 'N'
segmentation_column = 'Generation'  # The column used for segmentation if segment_results = 'Y'
filter_column = 'Generation'  # The column used for filtering
filter_values = ['Boomer']  # List of values to exclude from the dataset

# Define the path to your dataset
current_directory = os.getcwd()
excel_path = os.path.join(current_directory, '01-data', 'TAM_DEF.xlsx')
summary_dir = os.path.join(current_directory, '04-summary')

# Ensure the summary directory exists
if not os.path.exists(summary_dir):
    os.makedirs(summary_dir)

# Load the dataset
try:
    df = pd.read_excel(excel_path)
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print(f"File not found. Please check the file path: {excel_path}")
    exit()
except Exception as e:
    print(f"Error during dataset loading: {e}")
    exit()

# Apply filtering if filter_column and filter_values are set
if filter_column and filter_values:
    df = df[~df[filter_column].isin(filter_values)]
    print(f"Filtered dataset to exclude {filter_values} in {filter_column}")

if segment_results == 'Y':
    # Get unique values in the segmentation column
    segments = df[segmentation_column].unique()
    print(f"Found {segmentation_column} segments: {segments}")
else:
    # If not segmenting, treat the entire dataset as a single segment
    segments = ['Entire Dataset']
    df['Entire Dataset'] = 'Entire Dataset'  # Add a dummy column to facilitate the loop

# Initialize a list to store hypothesis results for all segments
all_hypothesis_results = []

# Loop through each segment and run the SEM model
for segment in segments:
    df_segment = df[df[segmentation_column] == segment] if segment_results == 'Y' else df
    
    # Define the SEM model with the Yvar_Work_Personal and revised hypotheses
    model_desc = """
    # Latent variables
    Trust =~ VAR11_PRIVACY_AI_Protect_Data + VAR16_ETHICS_AI_Developed_Ethical
    Ease_of_Use =~ VAR02_CG_AI_Training_Opo + VAR12_PRIVACY_AI_Give_Consent_Data_Usage
    Fairness =~ VAR25_FAIRNESS_AI_Treats_All_Fair + VAR26_FAIRNESS_Should_Reduce_Bias
    Training =~ VAR05_CG_AI_Training_Supported + VAR03_CG_AI_Training_Access

    # Direct relationships with Yvar_Work_Personal
    Yvar_Work_Personal ~ Trust
    Yvar_Work_Personal ~ Ease_of_Use
    Yvar_Work_Personal ~ Fairness
    Yvar_Work_Personal ~ Training

    # Relationships with latent variables
    Trust ~ VAR11_PRIVACY_AI_Protect_Data
    Trust ~ VAR16_ETHICS_AI_Developed_Ethical
    Trust ~ Training

    # Covariances (as needed)
    Trust ~~ Ease_of_Use
    Trust ~~ Fairness
    Ease_of_Use ~~ Fairness
    """

    # Create the model and load the dataset into the model
    try:
        model = Model(model_desc)
        model.load_dataset(df_segment)
        print(f"\nModel created and dataset loaded into the model successfully for {segment}.")
    except Exception as e:
        print(f"Error during model creation or dataset loading for {segment}: {e}")
        continue

    # Optimize the model
    try:
        optim = Optimizer(model)
        optim.optimize()
        print(f"Model optimization completed successfully for {segment}.")
    except Exception as e:
        print(f"Error during model optimization for {segment}: {e}")
        continue

    # Extract the results
    try:
        results = model.inspect()
        # Convert any "Not estimated" or non-numeric values to NaN
        results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-", None] else x)
        print(f"\nFull Results DataFrame for {segment}:")
        print(results)
    except Exception as e:
        print(f"Error during results extraction for {segment}: {e}")
        continue

    # Attempt to extract p-values for the paths
    try:
        pvalues = results[['p-value']].apply(pd.to_numeric, errors='coerce')  # Convert to numeric, set errors to NaN
        print(f"\nP-values extracted successfully for {segment}:")
        print(pvalues)
    except KeyError:
        print(f"\nUnable to extract p-values for {segment}. Check the results DataFrame above for available data.")
        continue

    # Define hypotheses and their corresponding paths based on revised hypotheses
    hypothesis_criteria = [
        ("Hypothesis 1: Trust influences Usage", 'Yvar_Work_Personal ~ Trust'),
        ("Hypothesis 2: Ease of Use influences Usage", 'Yvar_Work_Personal ~ Ease_of_Use'),
        ("Hypothesis 3: Fairness influences Usage", 'Yvar_Work_Personal ~ Fairness'),
        ("Hypothesis 4: AI-related training and career growth opportunities influence Usage", 'Yvar_Work_Personal ~ Training'),
        ("Hypothesis 5: Privacy and Data Protection influence Trust", 'Trust ~ VAR11_PRIVACY_AI_Protect_Data'),
        ("Hypothesis 6: Ethical Considerations influence Trust", 'Trust ~ VAR16_ETHICS_AI_Developed_Ethical')
    ]

    # Create a DataFrame to store the hypothesis results for the current segment
    hypothesis_results = []

    # Determine whether each hypothesis is accepted or rejected
    for hyp, path in hypothesis_criteria:
        matching_paths = results[(results['lval'] == path.split(' ~ ')[0]) & 
                                 (results['rval'] == path.split(' ~ ')[1])]
        if not matching_paths.empty:
            p_value = matching_paths['p-value'].values[0]
            estimate = matching_paths['Estimate'].values[0]
            std_err = matching_paths['Std. Err'].values[0]
            z_value = matching_paths['z-value'].values[0]
            result = 'Accepted' if not np.isnan(p_value) and p_value < 0.05 else 'Rejected'
            hypothesis_results.append({
                'Segment': segment,
                'Hypothesis': hyp,
                'p-value': p_value,
                'Estimate': estimate,
                'Std. Err': std_err,
                'z-value': z_value,
                'Result': result
            })
        else:
            print(f"Path {path} not found in results for {segment}. Please check the available paths.")
            hypothesis_results.append({
                'Segment': segment,
                'Hypothesis': hyp,
                'p-value': np.nan,
                'Estimate': np.nan,
                'Std. Err': np.nan,
                'z-value': np.nan,
                'Result': 'Path Not Found'
            })

    # Append the current segment's hypothesis results to the overall list
    all_hypothesis_results.extend(hypothesis_results)

# Convert the overall hypothesis results to a DataFrame
all_hypothesis_df = pd.DataFrame(all_hypothesis_results)

# Save the results to Excel
output_path = os.path.join(summary_dir, 'SEM_Results.xlsx')
with pd.ExcelWriter(output_path) as writer:
    # Write the overall hypothesis results
    all_hypothesis_df.to_excel(writer, sheet_name='Hypothesis Results', index=False)
    
    # Loop through each segment and write the SEM results for each segment in a separate tab
    for segment in segments:
        df_segment_results = df[df[segmentation_column] == segment] if segment_results == 'Y' else df
        # Extract the SEM results for the segment
        model = Model(model_desc)
        model.load_dataset(df_segment_results)
        optim = Optimizer(model)
        optim.optimize()
        results = model.inspect()
        # Write SEM results to its own sheet
        results.to_excel(writer, sheet_name=f'SEM Results - {segment}')

print(f"SEM results and hypothesis results saved to {output_path}.")


Dataset loaded successfully.
Filtered dataset to exclude ['Boomer'] in Generation

Model created and dataset loaded into the model successfully for Entire Dataset.




Model optimization completed successfully for Entire Dataset.

Full Results DataFrame for Entire Dataset:
                                        lval  op  \
0              VAR11_PRIVACY_AI_Protect_Data   ~   
1          VAR16_ETHICS_AI_Developed_Ethical   ~   
2                                      Trust   ~   
3                                      Trust   ~   
4                                      Trust   ~   
5                   VAR02_CG_AI_Training_Opo   ~   
6   VAR12_PRIVACY_AI_Give_Consent_Data_Usage   ~   
7          VAR25_FAIRNESS_AI_Treats_All_Fair   ~   
8          VAR26_FAIRNESS_Should_Reduce_Bias   ~   
9             VAR05_CG_AI_Training_Supported   ~   
10               VAR03_CG_AI_Training_Access   ~   
11                          Yvar_USE_AI_Work   ~   
12                          Yvar_USE_AI_Work   ~   
13                          Yvar_USE_AI_Work   ~   
14                          Yvar_USE_AI_Work   ~   
15                                     Trust  ~~   
16        

  results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-", None] else x)


SEM results and hypothesis results saved to /Users/danramirez/mbs-structural-equation-modeling/04-summary/SEM_Results.xlsx.
