In [None]:
# Load Required Libraries
install.packages(c("lavaan", "semPlot", "semPLS", "graphviz"))
library(lavaan)
library(semPlot)
library(semPLS)
library (graphviz)

In [1]:
# Load the Merged Dataset
# (Assume you saved it as merged_dataset_ready_for_sem.csv)
dataset <- read.csv("C:/Users/jason/OneDrive - The Pennsylvania State University/DAAN 881, Data Drive Decision Making/Project/Auto Data/Merged/merged_dataset_ready_for_sem.csv")

NameError: name 'dataset' is not defined

In [None]:
# Quick Summary
summary(dataset)
str(dataset)

In [None]:
# Clean and Prepare Data for SEM
# Remove rows with missing key predictors if necessary
dataset <- na.omit(dataset[,c('driver_age', 'opioid_flag', 'alcohol_flag', 'any_drug_flag', 'fatalities', 'injuries', 'severity_level')])

# Standardize predictor fields
dataset$driver_age <- as.numeric(dataset$driver_age)
dataset$opioid_flag <- as.numeric(dataset$opioid_flag)
dataset$alcohol_flag <- as.numeric(dataset$alcohol_flag)
dataset$any_drug_flag <- as.numeric(dataset$any_drug_flag)
dataset$fatalities <- as.numeric(dataset$fatalities)
dataset$injuries <- as.numeric(dataset$injuries)
dataset$severity_level <- as.numeric(dataset$severity_level)

In [None]:
# Define Our Structural Model
model_spec <- '
# Measurement Model
Severity =~ severity_level
Impairment =~ opioid_flag + alcohol_flag + any_drug_flag
DriverProfile =~ driver_age + fatalities + injuries

# Structural Model
Severity ~ Impairment + DriverProfile
Impairment ~ DriverProfile
'

In [None]:
# CB-SEM Execution (Covariance-Based SEM)
cbsem_fit <- sem(model_spec, data=dataset, std.lv=TRUE)
summary(cbsem_fit, fit.measures=TRUE)

# Plot the model visually
semPaths(cbsem_fit, what="est", fade=FALSE, residuals=FALSE, layout="tree", structural=TRUE, nCharNodes=7, edge.label.cex=1)

In [None]:
# PLS-SEM Execution (Partial Least Squares SEM)
# Setup measurement model for PLS
pls_mm <- matrix(c(
  "Impairment", "opioid_flag",
  "Impairment", "alcohol_flag",
  "Impairment", "any_drug_flag",
  "DriverProfile", "driver_age",
  "DriverProfile", "fatalities",
  "DriverProfile", "injuries",
  "Severity", "severity_level"
), ncol=2, byrow=TRUE)

# Setup structural model for PLS
pls_sm <- matrix(c(
  "DriverProfile", "Impairment",
  "DriverProfile", "Severity",
  "Impairment", "Severity"
), ncol=2, byrow=TRUE)

# Fit the PLS model
pls_model <- plsm(data=dataset, strucmod=pls_sm, measuremod=pls_mm)
pls_fit <- sempls(model=pls_model, data=dataset)

# Summary statistics
summary(pls_fit)

# Path Coefficients
pathCoeff(pls_fit)

# Loadings
plsLoadings(pls_fit)

# R-Squared Values
rSquared(pls_fit)

# Path diagram (requires Graphviz)
if (FALSE) {
  pathDiagram(pls_fit, file="pls_model", full=TRUE, edge.labels="values", output.type="graphics", graphics.fmt="pdf")
}