plspm is an R package dedicated to Partial Least Squares Path Modeling (PLS-PM) analysis for both metric and non-metric data. Versions later than 4.0 include a whole new set of features to handle non-metric variables.
Stable version on CRAN
# install "plspm"
install.packages("plspm")
Development version on github
# install "devtools"
install.packages("devtools")
library(devtools)
# install "plspm"
install_github("gastonstat/plspm")
Typical example with a Customer Satisfaction Model
# load plspm
library(plspm)
# load dataset satisfaction
data(satisfaction)
# define path matrix (inner model)
IMAG < -c(0,0,0,0,0,0)
EXPE <- c(1,0,0,0,0,0)
QUAL <- c(0,1,0,0,0,0)
VAL <- c(0,1,1,0,0,0)
SAT <- c(1,1,1,1,0,0)
LOY <- c(1,0,0,0,1,0)
sat_path <- rbind(IMAG, EXPE, QUAL, VAL, SAT, LOY)
# define list of blocks (outer model)
sat_blocks <- list(1:5, 6:10, 11:15, 16:19, 20:23, 24:27)
# vector of modes (reflective indicators)
sat_modes <- rep("A", 6)
# apply plspm with bootstrap validation
satpls <- plspm(satisfaction, sat_path, sat_blocks, modes = sat_modes,
scaled = FALSE, boot.val = TRUE)
# default print
satpls
# summary of results
summary(satpls)
# plot inner model results
plot(satpls, what = "inner")
# plot outer model loadings
plot(satpls, what = "loadings")
# plot outer model weights
plot(satpls, what = "weights")
Example with the classic Russett data (original data set)
# load dataset russett A
# (variable 'demo' as numeric)
data(russa)
# load dataset russett B
# (variable 'demo' as factor)
data(russb)
# russett all numeric
rus_path <- rbind(c(0, 0, 0), c(0, 0, 0), c(1, 1, 0))
rownames(rus_path) <- c("AGRI", "IND", "POLINS")
colnames(rus_path) <- c("AGRI", "IND", "POLINS")
rus_blocks <- list(1:3, 4:5, 6:9)
rus_scaling <- list(c("NUM", "NUM", "NUM"),
c("NUM", "NUM"),
c("NUM", "NUM", "NUM", "NUM"))
rus_modes <- c("A", "A", "A")
PLS-PM using data set russa
and scaling all 'NUM'
# PLS-PM using data set 'russa'
rus_pls1 <- plspm(russa, rus_path, rus_blocks, scaling = rus_scaling,
modes = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
rus_pls1
# outer model
rus_pls1$outer_model
# inner model
rus_pls1$inner_model
# scores
head(rus_pls1$scores)
# plot inner model
plot(rus_pls1)
PLS-PM using data set russa
, and different scaling
# new scaling
rus_scaling2 <- list(c("NUM", "NUM", "NUM"),
c("ORD", "ORD"),
c("NUM", "NUM", "NUM", "NOM"))
# PLS-PM using data set 'russa'
rus_pls2 <- plspm(russa, rus_path, rus_blocks, scaling = rus_scaling2,
modes = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
# outer model
rus_pls2$outer_model
Now let's use data set russb
(it contains a factor!)
# take a peek
head(russb)
# PLS-PM using data set 'russb'
rus_pls3 <- plspm(russb, rus_path, rus_blocks, scaling = rus_scaling2,
modes = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
# outer model
rus_pls3$outer_model
Now let's change modes
# modes new A
rus_modes2 <- c("newA", "newA", "newA")
# PLS-PM using data set 'russa'
rus_pls4 <- plspm(russa, rus_path, rus_blocks, scaling = rus_scaling2,
modes = rus_modes2, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
# outer model
rus_pls4$outer_model
Let's make things more interesting, flexible and versatile. How?
What if you could have more freedom specifying the arguments? Now you can!
Note that you can specify blocks
using variables' names, the scaling
types are NOT case senstive, neither are modes
nor scheme
. Isn't that cool?
# blocks
rus_blocchi <- list(
c("gini", "farm", "rent"),
c("gnpr", "labo"),
c("inst", "ecks", "death", "demo"))
# scaling
rus_scaling3 <- list(c("numeric", "numeric", "numeric"),
c("ordinal", "ORDINAL"),
c("NuM", "numer", "NUM", "nominal"))
# modes new A
rus_modes3 <- c("newa", "NEWA", "NewA")
# PLS-PM using data set 'russb'
rus_pls5 <- plspm(russb, rus_path, rus_blocchi, scaling = rus_scaling3,
modes = rus_modes3, scheme = "CENTROID", plscomp = c(1,1,1), tol = 0.0000001)
# outer model
rus_pls5$outer_model
Another nice feature is that you can perform a PLS-PM analysis on data containing missing values.
We'll use the dataset russa
and add some missing values. Then we'll handle all variables with a numeric scaling
.
# let's add missing values to russa
russNA <- russa
russNA[1,1] <- NA
russNA[4,4] <- NA
russNA[6,6] <- NA
# PLS-PM using data set 'russa'
rus_pls6 <- plspm(russNA, rus_path, rus_blocks, scaling = rus_scaling,
modes = rus_modes, scheme = "centroid", plscomp = c(1,1,1), tol = 0.0000001)
rus_pls6
# outer model
rus_pls6$outer_model
# inner model
rus_pls6$inner_model
# scores
head(rus_pls6$scores)
# plot inner model
plot(rus_pls6)
Gaston Sanchez
(gaston.stat at gmail.com
)
Laura Trinchera
(ltr at rouenbs.fr
)
Giorgio Russolillo
(giorgio.russolillo at cnam.fr
)