In [None]:
# SEM Analysis

import pandas as pd, plspm.config as c
from plspm.plspm import Plspm
from plspm.scheme import Scheme
from plspm.mode import Mode

TAM = pd.read_csv('TAM.csv', index_col=0)

# Establish relationships between constructs
structure = c.Structure()
structure.add_path(["SN"], ["PU", "PEU", "BI", "ATT"])
structure.add_path(["OQ"], ["PU"])
structure.add_path(["CP"], ["PEU", "ATT", "BI"])
structure.add_path(["PEU"], ["PU", "ATT", "BI", "ASU"])
structure.add_path(["PU"], ["ATT", "BI"])
structure.add_path(["ATT"], ["BI"])
structure.add_path(["BI"], ["ASU"])

# Establish relationships between manifest variables and constructs
config = c.Config(structure.path(), scaled=False)
config.add_lv_with_columns_named("OQ", Mode.A, TAM, "OQ")
config.add_lv_with_columns_named("SN", Mode.A, TAM, "SN")
config.add_lv_with_columns_named("PU", Mode.A, TAM, "PU")
config.add_lv_with_columns_named("ATT", Mode.A, TAM, "ATT")
config.add_lv_with_columns_named("BI", Mode.A, TAM, "BI")
config.add_lv_with_columns_named("ASU", Mode.A, TAM, "ASU")
config.add_lv_with_columns_named("CP", Mode.A, TAM, "CP")
config.add_lv_with_columns_named("PEU", Mode.A, TAM, "PEU")

# Check that the data are quantitative metric scale type
print(config.metric())

# Estimate the path model, using the Partial Least Squares (PLS) algorithm
plspm_calc = Plspm(TAM, config, Scheme.PATH, bootstrap=True, bootstrap_iterations=5000)

# Visualize results
print(plspm_calc.goodness_of_fit())
print(plspm_calc.unidimensionality())
print(plspm_calc.inner_summary())
print(plspm_calc.crossloadings())
print(plspm_calc.outer_model())
print(plspm_calc.inner_model())
print(plspm_calc.path_coefficients())
print(plspm_calc.effects())
print(plspm_calc.scores())


In [None]:
# Perform Common Method Bias (CMB) test

import numpy as np
import pandas as pd
from scipy.stats import f

# Calculate the correlation matrix between the observed variables using the crossloadings
df=plspm_calc.crossloadings()
corr_matrix = df.corr()

# Calculate the principal components of the correlation matrix
eigenvalues, eigenvectors = np.linalg.eig(corr_matrix)
first_component = eigenvectors[:, 0]

# Calculate the loading of each manifest variable in the first principal component
loadings = np.abs(df @ first_component)

# Calculate the proportion of total variance explained by the first principal component
total_variance = np.sum(eigenvalues)
explained_variance = eigenvalues[0]
explained_variance_ratio = explained_variance / total_variance

print(f"The first component explains {explained_variance_ratio * 100:.2f}% of the total variance")


In [None]:
# Elimination of unsupported hypotheses

import pandas as pd, plspm.config as c
from plspm.plspm import Plspm
from plspm.scheme import Scheme
from plspm.mode import Mode

TAM = pd.read_csv('TAM.csv', index_col=0)

# Establish relationships between constructs
structure = c.Structure()
structure.add_path(["SN"], ["PEU", "BI"])
structure.add_path(["OQ"], ["PU"])
structure.add_path(["CP"], ["PEU", "ATT", "BI"])
structure.add_path(["PEU"], ["PU", "ATT"])
structure.add_path(["PU"], ["ATT"])
structure.add_path(["ATT"], ["BI"])
structure.add_path(["BI"], ["ASU"])

# Establish relationships between manifest variables and constructs
config = c.Config(structure.path(), scaled=False)
config.add_lv_with_columns_named("OQ", Mode.A, TAM, "OQ")
config.add_lv_with_columns_named("SN", Mode.A, TAM, "SN")
config.add_lv_with_columns_named("PU", Mode.A, TAM, "PU")
config.add_lv_with_columns_named("ATT", Mode.A, TAM, "ATT")
config.add_lv_with_columns_named("BI", Mode.A, TAM, "BI")
config.add_lv_with_columns_named("ASU", Mode.A, TAM, "ASU")
config.add_lv_with_columns_named("CP", Mode.A, TAM, "CP")
config.add_lv_with_columns_named("PEU", Mode.A, TAM, "PEU")

# Estimate the path model, using the Partial Least Squares (PLS) algorithm
plspm_calc = Plspm(TAM, config, Scheme.PATH, bootstrap=True, bootstrap_iterations=5000)

# Visualize results
print(plspm_calc.goodness_of_fit())
print(plspm_calc.unidimensionality())
print(plspm_calc.inner_summary())
print(plspm_calc.crossloadings())
print(plspm_calc.outer_model())
print(plspm_calc.inner_model())
print(plspm_calc.path_coefficients())
print(plspm_calc.effects())
print(plspm_calc.scores())


In [None]:
# Create the dataset with the values of the constructs for each record (each surveyed person)
plspm_calc.scores().to_csv('TAM_constructs.csv', index=False)

In [None]:
# Normalize the data in the range [0, 1]

import pandas as pd

# Read CSV file with original data
df = pd.read_csv('TAM_constructs.csv')

# Normalize the data in each column
for col in df.columns:
    X_min = df[col].min()
    X_max = df[col].max()
    df[col] = (df[col] - X_min) / (X_max - X_min)

# Write CSV file with normalized data
df.to_csv('TAM_normalized_constructs.csv', index=False)


In [None]:
# ANN Analysis: BI construct prediction and sensitivity analysis

import pandas as pd
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.utils import plot_model
import tensorflow as tf

# Define custom RMSE loss function
def rmse_loss(y_true, y_pred):
    mse = tf.keras.losses.mean_squared_error(y_true, y_pred)
    rmse = tf.sqrt(mse)
    return rmse

# Load data from CSV file: columns of constructs not related to BI have been removed in a separate csv file
data = pd.read_csv('TAM_normalized_constructs_BI_prediction.csv')

# Split data into inputs (X) and output (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1:].values

# Define the number of folds for cross validation and the lists for storing the results of each fold
n_folds = 10
rmse_test_list = []
rmse_train_list = []
r2_test_list = []
r2_train_list=[]
importance_list = []

# Create KFold instance
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

# Iterate on each fold
for fold, (train_index, test_index) in enumerate(kf.split(X)):
    
    print(f"Fold {fold + 1}")
    
    # Splitting data into training and test sets
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Create the neural network
    model = Sequential()
    model.add(Dense(2, input_dim=3, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(0.001)))
    model.add(Dense(1, activation='sigmoid'))

    # Compile the model
    model.compile(loss=rmse_loss, optimizer=optimizers.RMSprop(learning_rate=0.001))

    # Training of the model
    history = model.fit(X_train, y_train, epochs=1000, batch_size=2, verbose=0, validation_data=(X_test, y_test))

    # Obtain model predictions
    y_pred = model.predict(X_test)
    y_pred_train = model.predict(X_train)

    # Evaluate the model in the train set
    rmse_train = model.evaluate(X_train, y_train, verbose=0)
    rmse_train_list.append(rmse_train)
    print(f"RMSE train: {rmse_train:.5f}")
    r2_train = r2_score(y_train, y_pred_train)
    r2_train_list.append(r2_train)
    print(f"R2 train: {r2_train:.5f}")
    
    # Evaluate the model in the test set
    rmse_test = model.evaluate(X_test, y_test, verbose=0)
    rmse_test_list.append(rmse_test)
    print(f"RMSE test: {rmse_test:.5f}")
    r2_test = r2_score(y_test, y_pred)
    r2_test_list.append(r2_test)
    print(f"R2 test: {r2_test:.5f}")

    # Calculate the relative importance of each input predictor
    layer = model.layers[0]
    weights = layer.get_weights()
    importance = np.abs(weights[0])
    importance_avg = np.mean(importance, axis=1)
    importance_list.append(importance_avg)
    print()
    print("Relative importance of each input predictor:\n", importance_avg)
        
    # Visualize the training
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Training progress')
    plt.xlabel('Epoch')
    plt.ylabel('RMSE')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

# Visualize the neural network
plot_model(model, to_file='TAM_model.png', show_shapes=True, show_layer_names=True)
print(model.summary())
    
# Calculate mean RMSE and standard deviation
rmse_train_mean = np.mean(rmse_train_list)
rmse_train_std = np.std(rmse_train_list)
print(f"RMSE train mean: {rmse_train_mean:.5f} +/- {rmse_train_std:.5f}")
rmse_test_mean = np.mean(rmse_test_list)
rmse_test_std = np.std(rmse_test_list)
print(f"RMSE test mean: {rmse_test_mean:.5f} +/- {rmse_test_std:.5f}")

# Calculate mean R2
r2_train_mean = np.mean(r2_train_list)
print(f"R2 train mean: {r2_train_mean:.5f}")
r2_test_mean = np.mean(r2_test_list)
print(f"R2 test mean: {r2_test_mean:.5f}")

# Display summary table and export it to csv
print()
results_df = pd.DataFrame({"RMSE test": rmse_test_list, "R2 test": r2_test_list,"RMSE train": rmse_train_list, "R2 train": r2_train_list})
print("Results of each fold:\n", results_df)
results_df.to_csv("Results_TAM.csv", index=True)
print()

# Calculate the mean relative importance of each input predictor
print()
importance_df = pd.DataFrame(importance_list, columns=data.columns[:-1])
importance_mean = importance_df.mean()

# Display summary table of relative importance and export it to csv
print("Relative importance of each input predictor for each fold:\n", importance_df)
print()
print("Mean relative importance of each input predictor:")
print(importance_mean)
importance_df.to_csv("Sensitivity_analysis_TAM.csv", index=True)
