# Data input

In [None]:
import os
import pandas as pd
import numpy as np
from semopy import Model, Optimizer

# Define the path to your dataset
current_directory = os.getcwd()
excel_path = os.path.join(current_directory, '01-data', 'TAM_DEF.xlsx')
summary_dir = os.path.join(current_directory, '04-summary')

# Ensure the summary directory exists
if not os.path.exists(summary_dir):
    os.makedirs(summary_dir)

# Load the dataset from the "Cleaned_final" sheet
try:
    df = pd.read_excel(excel_path, sheet_name='Cleaned_final')
    print("Dataset loaded successfully from 'Cleaned_final' sheet.")
except FileNotFoundError:
    print(f"File not found. Please check the file path: {excel_path}")
    exit()
except Exception as e:
    print(f"Error during dataset loading: {e}")
    exit()

# Model Work+ personal average score

In [None]:
import os
import pandas as pd
import numpy as np
from semopy import Model, Optimizer


# Generate summary statistics for the dataset
summary_stats = df.describe()
print("Summary statistics of the dataset:")
print(summary_stats)

# Define the SEM model with the Yvar_Work_Personal and revised hypotheses
model_desc = """
# Latent variables
Trust =~ VAR11_PRIVACY_AI_Protect_Data + VAR16_ETHICS_AI_Developed_Ethical
Ease_of_Use =~ VAR02_CG_AI_Training_Opo + VAR12_PRIVACY_AI_Give_Consent_Data_Usage
Fairness =~ VAR25_FAIRNESS_AI_Treats_All_Fair + VAR26_FAIRNESS_Should_Reduce_Bias

# Direct relationships with Yvar_Work_Personal
Yvar_Work_Personal ~ Trust
Yvar_Work_Personal ~ Ease_of_Use
Yvar_Work_Personal ~ Fairness

# Relationships with latent variables
Trust ~ VAR11_PRIVACY_AI_Protect_Data
Trust ~ VAR16_ETHICS_AI_Developed_Ethical
Ease_of_Use ~ VAR02_CG_AI_Training_Opo

# Covariances (as needed)
Trust ~~ Ease_of_Use
Trust ~~ Fairness
Ease_of_Use ~~ Fairness
"""

# Create the model and load the dataset into the model
try:
    model = Model(model_desc)
    model.load_dataset(df)
    print("Model created and dataset loaded into the model successfully.")
except Exception as e:
    print(f"Error during model creation or dataset loading: {e}")
    exit()

# Optimize the model
try:
    optim = Optimizer(model)
    optim.optimize()
    print("Model optimization completed successfully.")
except Exception as e:
    print(f"Error during model optimization: {e}")
    exit()

# Extract the results
try:
    results = model.inspect()
    # Convert any "Not estimated" or non-numeric values to NaN
    results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-"] else x)
    print("Full Results DataFrame:")
    print(results)
except Exception as e:
    print(f"Error during results extraction: {e}")
    exit()

# Attempt to extract p-values for the paths
try:
    pvalues = results[['p-value']].apply(pd.to_numeric, errors='coerce')  # Convert to numeric, set errors to NaN
    print("\nP-values extracted successfully:")
    print(pvalues)
except KeyError:
    print("\nUnable to extract p-values. Check the results DataFrame above for available data.")
    exit()

# Define hypotheses and their corresponding paths based on revised hypotheses
hypothesis_criteria = [
    ("Hypothesis 1: Trust influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Trust'),
    ("Hypothesis 2: Ease of Use influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Ease_of_Use'),
    ("Hypothesis 3: Fairness influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Fairness'),
    ("Hypothesis 4: Privacy and Data Protection influence Trust", 'Trust ~ VAR11_PRIVACY_AI_Protect_Data'),
    ("Hypothesis 5: Ethical Considerations influence Trust", 'Trust ~ VAR16_ETHICS_AI_Developed_Ethical'),
    ("Hypothesis 6: Training Opportunities influence Ease of Use", 'Ease_of_Use ~ VAR02_CG_AI_Training_Opo')
]

# Determine whether each hypothesis is accepted or rejected
accepted_hypotheses = []
rejected_hypotheses = []

for hyp, path in hypothesis_criteria:
    matching_paths = [p for p in results['lval'] + " ~ " + results['rval'] if path in p]
    if matching_paths:
        try:
            p_value = pvalues.loc[results['lval'] == matching_paths[0].split(" ~ ")[0], 'p-value'].values[0]
            if not np.isnan(p_value) and p_value < 0.05:
                accepted_hypotheses.append(hyp)
            else:
                rejected_hypotheses.append(hyp)
        except Exception as e:
            print(f"Error during p-value extraction for {path}: {e}")
            rejected_hypotheses.append(hyp)
    else:
        print(f"Path {path} not found in results. Please check the available paths.")
        rejected_hypotheses.append(hyp)

# Output the results
print("\nAccepted Hypotheses:")
for hyp in accepted_hypotheses:
    print(hyp)

print("\nRejected Hypotheses:")
for hyp in rejected_hypotheses:
    print(hyp)

# Save the results and summary statistics to Excel
with pd.ExcelWriter(summary_path) as writer:
    summary_stats.to_excel(writer, sheet_name='Summary Statistics')
    results.to_excel(writer, sheet_name='SEM Results')
    pvalues.to_excel(writer, sheet_name='P-Values')
    
    # Write accepted and rejected hypotheses
    pd.DataFrame({'Accepted Hypotheses': accepted_hypotheses}).to_excel(writer, sheet_name='Accepted Hypotheses', index=False)
    pd.DataFrame({'Rejected Hypotheses': rejected_hypotheses}).to_excel(writer, sheet_name='Rejected Hypotheses', index=False)

print(f"Summary statistics and SEM results saved to {summary_path}")


# Model by Gen

In [None]:
import os
import pandas as pd
import numpy as np
from semopy import Model, Optimizer

# Combine "Gen X" and "Boomer" into one segment called "Gen X & Boomer"
df['Generation'] = df['Generation'].replace({'Gen X': 'Gen X & Boomer', 'Boomer': 'Gen X & Boomer'})

# Get unique generations after combining
generations = df['Generation'].unique()
print(f"Found generations: {generations}")

# Loop through each generation and run the SEM model
for generation in generations:
    df_gen = df[df['Generation'] == generation]
    
    # Generate summary statistics for the subset
    summary_stats = df_gen.describe()
    print(f"Summary statistics for {generation}:")
    print(summary_stats)

    # Define the SEM model with the Yvar_Work_Personal and revised hypotheses
    model_desc = """
    # Latent variables
    Trust =~ VAR11_PRIVACY_AI_Protect_Data + VAR16_ETHICS_AI_Developed_Ethical
    Ease_of_Use =~ VAR02_CG_AI_Training_Opo + VAR12_PRIVACY_AI_Give_Consent_Data_Usage
    Fairness =~ VAR25_FAIRNESS_AI_Treats_All_Fair + VAR26_FAIRNESS_Should_Reduce_Bias

    # Direct relationships with Yvar_Work_Personal
  Yvar_Work_Personal ~ Trust
  Yvar_Work_Personal ~ Ease_of_Use
  Yvar_Work_Personal ~ Fairness

    # Relationships with latent variables
    Trust ~ VAR11_PRIVACY_AI_Protect_Data
    Trust ~ VAR16_ETHICS_AI_Developed_Ethical
    Ease_of_Use ~ VAR02_CG_AI_Training_Opo

    # Covariances (as needed)
    Trust ~~ Ease_of_Use
    Trust ~~ Fairness
    Ease_of_Use ~~ Fairness
    """

    # Create the model and load the dataset into the model
    try:
        model = Model(model_desc)
        model.load_dataset(df_gen)
        print(f"Model created and dataset loaded into the model successfully for {generation}.")
    except Exception as e:
        print(f"Error during model creation or dataset loading for {generation}: {e}")
        continue

    # Optimize the model
    try:
        optim = Optimizer(model)
        optim.optimize()
        print(f"Model optimization completed successfully for {generation}.")
    except Exception as e:
        print(f"Error during model optimization for {generation}: {e}")
        continue

    # Extract the results
    try:
        results = model.inspect()
        # Convert any "Not estimated" or non-numeric values to NaN
        results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-"] else x)
        print(f"Full Results DataFrame for {generation}:")
        print(results)
    except Exception as e:
        print(f"Error during results extraction for {generation}: {e}")
        continue

    # Attempt to extract p-values for the paths
    try:
        pvalues = results[['p-value']].apply(pd.to_numeric, errors='coerce')  # Convert to numeric, set errors to NaN
        print(f"\nP-values extracted successfully for {generation}:")
        print(pvalues)
    except KeyError:
        print(f"\nUnable to extract p-values for {generation}. Check the results DataFrame above for available data.")
        continue

    # Define hypotheses and their corresponding paths based on revised hypotheses
    hypothesis_criteria = [
        ("Hypothesis 1: Trust influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Trust'),
        ("Hypothesis 2: Ease of Use influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Ease_of_Use'),
        ("Hypothesis 3: Fairness influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Fairness'),
        ("Hypothesis 4: Privacy and Data Protection influence Trust", 'Trust ~ VAR11_PRIVACY_AI_Protect_Data'),
        ("Hypothesis 5: Ethical Considerations influence Trust", 'Trust ~ VAR16_ETHICS_AI_Developed_Ethical'),
        ("Hypothesis 6: Training Opportunities influence Ease of Use", 'Ease_of_Use ~ VAR02_CG_AI_Training_Opo')
    ]

    # Determine whether each hypothesis is accepted or rejected
    accepted_hypotheses = []
    rejected_hypotheses = []

    for hyp, path in hypothesis_criteria:
        matching_paths = [p for p in results['lval'] + " ~ " + results['rval'] if path in p]
        if matching_paths:
            try:
                p_value = pvalues.loc[results['lval'] == matching_paths[0].split(" ~ ")[0], 'p-value'].values[0]
                if not np.isnan(p_value) and p_value < 0.05:
                    accepted_hypotheses.append(hyp)
                else:
                    rejected_hypotheses.append(hyp)
            except Exception as e:
                print(f"Error during p-value extraction for {path} in {generation}: {e}")
                rejected_hypotheses.append(hyp)
        else:
            print(f"Path {path} not found in results for {generation}. Please check the available paths.")
            rejected_hypotheses.append(hyp)

    # Output the results
    print(f"\nAccepted Hypotheses for {generation}:")
    for hyp in accepted_hypotheses:
        print(hyp)

    print(f"\nRejected Hypotheses for {generation}:")
    for hyp in rejected_hypotheses:
        print(hyp)

    # Save the results and summary statistics to Excel for each generation
    gen_summary_path = os.path.join(summary_dir, f'model_summary_{generation}.xlsx')
    with pd.ExcelWriter(gen_summary_path) as writer:
        summary_stats.to_excel(writer, sheet_name='Summary Statistics')
        results.to_excel(writer, sheet_name='SEM Results')
        pvalues.to_excel(writer, sheet_name='P-Values')
        
        # Write accepted and rejected hypotheses
        pd.DataFrame({'Accepted Hypotheses': accepted_hypotheses}).to_excel(writer, sheet_name='Accepted Hypotheses', index=False)
        pd.DataFrame({'Rejected Hypotheses': rejected_hypotheses}).to_excel(writer, sheet_name='Rejected Hypotheses', index=False)

    print(f"Summary statistics and SEM results saved to {gen_summary_path} for {generation}.")


# by CS experience

In [9]:


# Get unique values in the 'CS_experience_rollup' column
cs_experience_segments = df['CS_experience_rollup'].unique()
print(f"Found CS_experience_rollup segments: {cs_experience_segments}")

# Loop through each segment in the 'CS_experience_rollup' column and run the SEM model
for segment in cs_experience_segments:
    df_segment = df[df['CS_experience_rollup'] == segment]
    
    # Generate summary statistics for the subset
    summary_stats = df_segment.describe()
    print(f"Summary statistics for {segment}:")
    print(summary_stats)

    # Define the SEM model with the Yvar_Work_Personal and revised hypotheses
    model_desc = """
    # Latent variables
    Trust =~ VAR11_PRIVACY_AI_Protect_Data + VAR16_ETHICS_AI_Developed_Ethical
    Ease_of_Use =~ VAR02_CG_AI_Training_Opo + VAR12_PRIVACY_AI_Give_Consent_Data_Usage
    Fairness =~ VAR25_FAIRNESS_AI_Treats_All_Fair + VAR26_FAIRNESS_Should_Reduce_Bias

    # Direct relationships with Yvar_Work_Personal
    Yvar_Work_Personal ~ Trust
    Yvar_Work_Personal ~ Ease_of_Use
    Yvar_Work_Personal ~ Fairness

    # Relationships with latent variables
    Trust ~ VAR11_PRIVACY_AI_Protect_Data
    Trust ~ VAR16_ETHICS_AI_Developed_Ethical
    Ease_of_Use ~ VAR02_CG_AI_Training_Opo

    # Covariances (as needed)
    Trust ~~ Ease_of_Use
    Trust ~~ Fairness
    Ease_of_Use ~~ Fairness
    """

    # Create the model and load the dataset into the model
    try:
        model = Model(model_desc)
        model.load_dataset(df_segment)
        print(f"Model created and dataset loaded into the model successfully for {segment}.")
    except Exception as e:
        print(f"Error during model creation or dataset loading for {segment}: {e}")
        continue

    # Optimize the model
    try:
        optim = Optimizer(model)
        optim.optimize()
        print(f"Model optimization completed successfully for {segment}.")
    except Exception as e:
        print(f"Error during model optimization for {segment}: {e}")
        continue

    # Extract the results
    try:
        results = model.inspect()
        # Convert any "Not estimated" or non-numeric values to NaN
        results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-", None] else x)
        print(f"Full Results DataFrame for {segment}:")
        print(results)
    except Exception as e:
        print(f"Error during results extraction for {segment}: {e}")
        continue

    # Attempt to extract p-values for the paths
    try:
        pvalues = results[['p-value']].apply(pd.to_numeric, errors='coerce')  # Convert to numeric, set errors to NaN
        print(f"\nP-values extracted successfully for {segment}:")
        print(pvalues)
    except KeyError:
        print(f"\nUnable to extract p-values for {segment}. Check the results DataFrame above for available data.")
        continue

    # Define hypotheses and their corresponding paths based on revised hypotheses
    hypothesis_criteria = [
        ("Hypothesis 1: Trust influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Trust'),
        ("Hypothesis 2: Ease of Use influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Ease_of_Use'),
        ("Hypothesis 3: Fairness influences Yvar_Work_Personal", 'Yvar_Work_Personal ~ Fairness'),
        ("Hypothesis 4: AI-related training and career growth opportunities influence Ease of Use", 'Ease_of_Use ~ VAR02_CG_AI_Training_Opo'),
        ("Hypothesis 5: Security and safety features enhance trust in AI", 'Trust ~ VAR11_PRIVACY_AI_Protect_Data'),
        ("Hypothesis 6: Ethical design and positive social impact increase trust in AI", 'Trust ~ VAR16_ETHICS_AI_Developed_Ethical')
    ]

    # Determine whether each hypothesis is accepted or rejected
    accepted_hypotheses = []
    rejected_hypotheses = []

    for hyp, path in hypothesis_criteria:
        matching_paths = [p for p in results['lval'] + " ~ " + results['rval'] if path in p]
        if matching_paths:
            try:
                p_value = pvalues.loc[results['lval'] == matching_paths[0].split(" ~ ")[0], 'p-value'].values[0]
                if not np.isnan(p_value) and p_value < 0.05:
                    accepted_hypotheses.append(hyp)
                else:
                    rejected_hypotheses.append(hyp)
            except Exception as e:
                print(f"Error during p-value extraction for {path} in {segment}: {e}")
                rejected_hypotheses.append(hyp)
        else:
            print(f"Path {path} not found in results for {segment}. Please check the available paths.")
            rejected_hypotheses.append(hyp)

    # Output the results
    print(f"\nAccepted Hypotheses for {segment}:")
    for hyp in accepted_hypotheses:
        print(hyp)

    print(f"\nRejected Hypotheses for {segment}:")
    for hyp in rejected_hypotheses:
        print(hyp)

    # Save the results and summary statistics to Excel for each segment
    segment_summary_path = os.path.join(summary_dir, f'model_summary_{segment}.xlsx')
    with pd.ExcelWriter(segment_summary_path) as writer:
        summary_stats.to_excel(writer, sheet_name='Summary Statistics')
        results.to_excel(writer, sheet_name='SEM Results')
        pvalues.to_excel(writer, sheet_name='P-Values')
        
        # Write accepted and rejected hypotheses
        pd.DataFrame({'Accepted Hypotheses': accepted_hypotheses}).to_excel(writer, sheet_name='Accepted Hypotheses', index=False)
        pd.DataFrame({'Rejected Hypotheses': rejected_hypotheses}).to_excel(writer, sheet_name='Rejected Hypotheses', index=False)

    print(f"Summary statistics and SEM results saved to {segment_summary_path} for {segment}.")


  results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-", None] else x)


Found CS_experience_rollup segments: ['Experienced' 'Novice' 'No Experience']
Summary statistics for Experienced:
       VAR01_CG_Training  VAR02_CG_AI_Training_Opo  \
count         250.000000                250.000000   
mean            3.536000                  4.000000   
std             1.245354                  0.878114   
min             1.000000                  1.000000   
25%             3.000000                  4.000000   
50%             4.000000                  4.000000   
75%             4.000000                  5.000000   
max             5.000000                  5.000000   

       VAR03_CG_AI_Training_Access  VAR04_CG_AI_Training_helps_skills  \
count                   250.000000                         250.000000   
mean                      4.188000                           3.988000   
std                       0.909927                           0.929144   
min                       1.000000                           1.000000   
25%                       4.000000

  results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-", None] else x)
  results = results.applymap(lambda x: np.nan if x in ["Not estimated", "-", None] else x)


Summary statistics and SEM results saved to /Users/danramirez/mbs-structural-equation-modeling/04-summary/model_summary_Novice.xlsx for Novice.
Summary statistics for No Experience:
       VAR01_CG_Training  VAR02_CG_AI_Training_Opo  \
count         335.000000                 335.00000   
mean            3.185075                   3.78209   
std             1.316097                   0.93356   
min             1.000000                   1.00000   
25%             2.000000                   3.00000   
50%             4.000000                   4.00000   
75%             4.000000                   4.00000   
max             5.000000                   5.00000   

       VAR03_CG_AI_Training_Access  VAR04_CG_AI_Training_helps_skills  \
count                   335.000000                         335.000000   
mean                      4.053731                           3.767164   
std                       0.894147                           0.963152   
min                       1.000000     