In [13]:
import pandas as pd
import unicodedata

df = pd.read_csv("./data_labeled.csv")   # ƒë∆∞·ªùng d·∫´n t√πy th∆∞ m·ª•c c·ªßa b·∫°n

# ==== H√†m b·ªè d·∫•u ====
def strip_accents(text: str) -> str:
    text = unicodedata.normalize('NFD', text)
    return ''.join(ch for ch in text if unicodedata.category(ch) != 'Mn')

# ==== Chu·∫©n h√≥a LOCATION ====
def normalize_location(value: str) -> str:
    v = strip_accents(str(value)).lower()

    if any(x in v for x in ["hcm", "ho chi minh", "tphcm", "tp.hcm", "tp hochiminh"]):
        return "Th√†nh ph·ªë H·ªì Ch√≠ Minh"
    if "binh duong" in v:
        return "T·ªânh B√¨nh D∆∞∆°ng"
    if "dong nai" in v or "nhon trach" in v or "long thanh" in v:
        return "T·ªânh ƒê·ªìng Nai"
    if "ba ria" in v or "vung tau" in v:
        return "T·ªânh B√† R·ªãa - V≈©ng T√†u"
    return "Kh√°c"

# ==== Chu·∫©n h√≥a BUSINESS TYPE ====
def normalize_business(value: str) -> str:
    v = strip_accents(str(value)).lower()
    if "hang tau" in v or "h√£ng t√†u" in v:
        return "H√£ng t√†u"
    if "dai ly" in v or "ƒë·∫°i l√Ω" in v:
        return "ƒê·∫°i l√Ω v·∫≠n t·∫£i"
    if "xuat nhap khau" in v:
        return "C√¥ng ty xu·∫•t nh·∫≠p kh·∫©u"
    if "san xuat" in v or "cung cap dich vu" in v:
        return "C√¥ng ty chuy√™n s·∫£n xu·∫•t/ cung c·∫•p d·ªãch v·ª•"
    return "Kh√°c"

# ==== Chu·∫©n h√≥a USAGE FREQUENCY ====
def normalize_usage(value: str) -> str:
    v = strip_accents(str(value)).lower().strip()
    if "1 - 3" in v:
        return "1 - 3 l·∫ßn/th√°ng"
    if "tren 3" in v:
        return "Tr√™n 3 l·∫ßn/th√°ng"
    return value  # fallback

df["location_group"] = df["location"].apply(normalize_location)
df["business_type_group"] = df["business_type"].apply(normalize_business)
df["usage_group"] = df["usage_frequency"].apply(normalize_usage)


In [14]:
def frequency_table(series: pd.Series) -> pd.DataFrame:
    freq = series.value_counts().reset_index()
    freq.columns = ["category", "frequency"]
    freq["percent"] = (freq["frequency"] / freq["frequency"].sum() * 100).round(1)
    return freq


In [15]:
freq_location = frequency_table(df["location_group"])
freq_business = frequency_table(df["business_type_group"])
freq_usage = frequency_table(df["usage_group"])

print("üìå ƒê·ªãa ƒëi·ªÉm ƒëƒÉng k√Ω kinh doanh:\n", freq_location, "\n")
print("üìå Lo·∫°i h√¨nh kinh doanh:\n", freq_business, "\n")
print("üìå T·∫ßn su·∫•t s·ª≠ d·ª•ng d·ªãch v·ª•:\n", freq_usage, "\n")


üìå ƒê·ªãa ƒëi·ªÉm ƒëƒÉng k√Ω kinh doanh:
                  category  frequency  percent
0   Th√†nh ph·ªë H·ªì Ch√≠ Minh         90     55.6
1                    Kh√°c         58     35.8
2           T·ªânh ƒê·ªìng Nai         11      6.8
3  T·ªânh B√† R·ªãa - V≈©ng T√†u          2      1.2
4         T·ªânh B√¨nh D∆∞∆°ng          1      0.6 

üìå Lo·∫°i h√¨nh kinh doanh:
                                     category  frequency  percent
0                     C√¥ng ty xu·∫•t nh·∫≠p kh·∫©u        112     69.1
1                                       Kh√°c         43     26.5
2  C√¥ng ty chuy√™n s·∫£n xu·∫•t/ cung c·∫•p d·ªãch v·ª•          5      3.1
3                                   H√£ng t√†u          2      1.2 

üìå T·∫ßn su·∫•t s·ª≠ d·ª•ng d·ªãch v·ª•:
            category  frequency  percent
0  Tr√™n 3 l·∫ßn/th√°ng        134     82.7
1   1 - 3 l·∫ßn/th√°ng         28     17.3 



In [16]:
with pd.ExcelWriter("task1_frequency_tables.xlsx") as writer:
    freq_location.to_excel(writer, sheet_name="location", index=False)
    freq_business.to_excel(writer, sheet_name="business_type", index=False)
    freq_usage.to_excel(writer, sheet_name="usage", index=False)

print("‚úÖ ƒê√£ xu·∫•t file: task1_frequency_tables.xlsx")


‚úÖ ƒê√£ xu·∫•t file: task1_frequency_tables.xlsx


In [11]:
len(df), df["location_group"].isna().sum(), df["business_type_group"].isna().sum(), df["usage_group"].isna().sum()


(162, 0, 0, 0)

In [12]:
df["usage_frequency"].value_counts()


usage_frequency
Tr√™n 3 l·∫ßn/th√°ng    134
1 - 3 l·∫ßn/th√°ng      28
Name: count, dtype: int64

In [None]:
def frequency_table(series: pd.Series) -> pd.DataFrame:
    """
    Tr·∫£ v·ªÅ b·∫£ng t·∫ßn s·ªë & t·ªâ l·ªá %
    """
    counts = series.value_counts().reset_index()
    counts.columns = ["category", "frequency"]
    total = counts["frequency"].sum()
    counts["percent"] = counts["frequency"] / total * 100
    # s·∫Øp x·∫øp theo frequency gi·∫£m d·∫ßn (ho·∫∑c gi·ªØ nguy√™n theo index n·∫øu mu·ªën)
    return counts


In [25]:
import pandas as pd
import unicodedata
import numpy as np
from pathlib import Path
import os

df = pd.read_csv("../data_labeled.csv")   # ƒë∆∞·ªùng d·∫´n t√πy th∆∞ m·ª•c c·ªßa b·∫°n



print("S·ªë d√≤ng, s·ªë c·ªôt:", df.shape)
df.head()
BASE_DIR = Path().resolve()


S·ªë d√≤ng, s·ªë c·ªôt: (162, 43)


In [26]:
# 9 c·ªôt ƒë·∫ßu l√† th√¥ng tin m√¥ t·∫£, t·ª´ RE1 tr·ªü ƒëi l√† Likert
METADATA_COLS = [
    "timestamp", "email", "company", "location",
    "business_type", "position", "experience",
    "used_service", "usage_frequency"
]

# T·∫§T C·∫¢ c√°c item thang ƒëo (ƒë√∫ng theo header hi·ªán t·∫°i)
ITEM_COLS = [
    "RE1", "RE2", "RE3", "RE4", "RE5",          # Ngu·ªìn l·ª±c
    "OU1", "OU2", "OU3", "OU4", "OU5",          # K·∫øt qu·∫£
    "PR1", "PR2", "PR3", "PR4",                 # Quy tr√¨nh
    "MA1", "MA2", "MA3", "MA4", "MA5", "MA6",   # Qu·∫£n l√Ω
    "ISR1", "ISR2", "ISR3",                     # H√¨nh ·∫£nh (ph·∫ßn 1)
    "P1", "P2", "P3", "P4", "P5", "P6",         # H√¨nh ·∫£nh / TNXH (ph·∫ßn 2)
    "CS1", "CS2", "CS3", "CS4", "CS5"           # HL + CS (CS4, CS5 = bi·∫øn ph·ª• thu·ªôc)
]

# Ki·ªÉm tra xem t·∫•t c·∫£ item ƒë·ªÅu c√≥ trong df ch∆∞a
missing_cols = [c for c in ITEM_COLS if c not in df.columns]
print("Thi·∫øu c·ªôt n√†o kh√¥ng? ->", missing_cols)

# √âp ki·ªÉu s·ªë cho to√†n b·ªô item (ph√≤ng tr∆∞·ªùng h·ª£p ƒë·ªçc v√†o l√† object/string)
df[ITEM_COLS] = df[ITEM_COLS].apply(pd.to_numeric, errors="coerce")

df[ITEM_COLS].dtypes.head()


Thi·∫øu c·ªôt n√†o kh√¥ng? -> []


RE1    int64
RE2    int64
RE3    int64
RE4    int64
RE5    int64
dtype: object

In [None]:
# SCALES = {
#     "NL": ["RE1", "RE2", "RE3", "RE4", "RE5"],          # Ngu·ªìn l·ª±c
#     "KQ": ["OU1", "OU2", "OU3", "OU4", "OU5"],          # K·∫øt qu·∫£
#     "QT": ["PR1", "PR2", "PR3", "PR4"],                 # Quy tr√¨nh
#     "QL": ["MA1", "MA2", "MA3", "MA4", "MA5", "MA6"],   # Qu·∫£n l√Ω
#     "HA": ["ISR1", "ISR2", "ISR3", "P1", "P2", "P3", "P4", "P5", "P6"],  # H√¨nh ·∫£nh & TNXH
#     "CS": ["CS1", "CS2", "CS3", "CS4", "CS5"]           # H√†i l√≤ng kh√°ch h√†ng (bi·∫øn ph·ª• thu·ªôc)
# }


SCALES = {
    # 6 bi·∫øn ƒë·ªôc l·∫≠p
    "RE"  : ["RE1", "RE2", "RE3", "RE4", "RE5"],              # Resource
    "OU"  : ["OU1", "OU2", "OU3", "OU4", "OU5"],              # Outcome
    "PR"  : ["PR1", "PR2", "PR3", "PR4"],                     # Process
    "MA"  : ["MA1", "MA2", "MA3", "MA4", "MA5", "MA6"],      # Management
    "ISR" : ["ISR1", "ISR2", "ISR3"],                         # Image & Social responsibility
    "P"   : ["P1", "P2", "P3", "P4", "P5", "P6"],             # Price

    # 1 bi·∫øn ph·ª• thu·ªôc
    "CS"  : ["CS1", "CS2", "CS3", "CS4", "CS5"]               # Customer satisfaction
}

NL: 5 items -> ['RE1', 'RE2', 'RE3', 'RE4', 'RE5']
KQ: 5 items -> ['OU1', 'OU2', 'OU3', 'OU4', 'OU5']
QT: 4 items -> ['PR1', 'PR2', 'PR3', 'PR4']
QL: 6 items -> ['MA1', 'MA2', 'MA3', 'MA4', 'MA5', 'MA6']
HA: 9 items -> ['ISR1', 'ISR2', 'ISR3', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6']
HL: 3 items -> ['CS1', 'CS2', 'CS3']
CS: 2 items -> ['CS4', 'CS5']


In [28]:
# T√≠nh th·ªëng k√™ m√¥ t·∫£ cho t·ª´ng item
desc_items = df[ITEM_COLS].agg(['count', 'min', 'max', 'mean', 'std']).T

# ƒê·ªïi t√™n c·ªôt cho gi·ªëng lu·∫≠n vƒÉn
desc_items = desc_items.rename(columns={
    'count': 'N',
    'min': 'Min',
    'max': 'Max',
    'mean': 'Mean',
    'std': 'Std'
})

# L√†m tr√≤n 3 ch·ªØ s·ªë th·∫≠p ph√¢n (gi·ªëng SPSS)
desc_items = desc_items.round(3)

desc_items.head(10)


Unnamed: 0,N,Min,Max,Mean,Std
RE1,162.0,1.0,5.0,4.117,0.644
RE2,162.0,2.0,5.0,4.025,0.747
RE3,162.0,2.0,5.0,4.08,0.739
RE4,162.0,1.0,5.0,4.241,0.703
RE5,162.0,1.0,5.0,4.08,0.804
OU1,162.0,1.0,5.0,4.0,0.772
OU2,162.0,2.0,5.0,4.111,0.696
OU3,162.0,1.0,5.0,4.173,0.727
OU4,162.0,2.0,5.0,4.173,0.674
OU5,162.0,1.0,5.0,4.142,0.73


In [29]:
# T·∫°o c·ªôt ƒëi·ªÉm trung b√¨nh cho t·ª´ng thang ƒëo
for latent, items in SCALES.items():
    df[latent] = df[items].mean(axis=1)

# Th·ªëng k√™ m√¥ t·∫£ cho t·ª´ng thang ƒëo
scale_cols = list(SCALES.keys())
desc_scales = df[scale_cols].agg(['count', 'min', 'max', 'mean', 'std']).T
desc_scales = desc_scales.rename(columns={
    'count': 'N',
    'min': 'Min',
    'max': 'Max',
    'mean': 'Mean',
    'std': 'Std'
}).round(3)

desc_scales


Unnamed: 0,N,Min,Max,Mean,Std
NL,162.0,1.4,5.0,4.109,0.569
KQ,162.0,2.0,5.0,4.12,0.565
QT,162.0,1.75,5.0,4.131,0.579
QL,162.0,1.833,5.0,4.156,0.541
HA,162.0,2.0,5.0,4.111,0.536
HL,162.0,2.0,5.0,4.148,0.565
CS,162.0,2.0,5.0,4.179,0.567


In [31]:
OUTPUT_DIR = BASE_DIR / "output"
OUTPUT_DIR.mkdir(exist_ok=True)

output_file = OUTPUT_DIR / "Task2_Descriptive_Statistics.xlsx"

with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    desc_items.to_excel(writer, sheet_name="Items_Descriptive")
    desc_scales.to_excel(writer, sheet_name="Scales_Descriptive")

print("ƒê√£ l∆∞u file Task 2 t·∫°i:", output_file)


ƒê√£ l∆∞u file Task 2 t·∫°i: C:\Users\tranh\Downloads\FTU_Thesis\tasks\output\Task2_Descriptive_Statistics.xlsx


# Task 3

In [44]:
# √î 1: C√†i (n·∫øu ch∆∞a c√≥) v√† import th∆∞ vi·ªán + ƒë·ªçc d·ªØ li·ªáu

# Ch·ªâ ch·∫°y pip m·ªôt l·∫ßn trong m√¥i tr∆∞·ªùng m·ªõi
# !pip install plspm

import pandas as pd
import plspm.config as c
from plspm.plspm import Plspm
from plspm.scheme import Scheme
from plspm.mode import Mode
from plspm.scale import Scale

# ƒê·ªçc d·ªØ li·ªáu ƒë√£ g·∫Øn nh√£n
# N·∫øu b·∫°n l∆∞u d·∫°ng csv th√¨ d√πng read_csv t∆∞∆°ng ·ª©ng
df_raw = pd.read_excel("../data_labeled.xlsx")

# Gi·ªØ l·∫°i ƒë√∫ng m·∫´u nghi√™n c·ª©u: nh·ªØng ng∆∞·ªùi "ƒê√£ t·ª´ng s·ª≠ d·ª•ng" d·ªãch v·ª•
df = df_raw[df_raw["used_service"] == "ƒê√£ t·ª´ng s·ª≠ d·ª•ng"].reset_index(drop=True)

print("S·ªë m·∫´u sau l·ªçc:", len(df))
df.head()


S·ªë m·∫´u sau l·ªçc: 159


Unnamed: 0,timestamp,email,company,location,business_type,position,experience,used_service,usage_frequency,RE1,...,P2,P3,P4,P5,P6,CS1,CS2,CS3,CS4,CS5
0,11/6/2025 9:18:52,ducthach120990@gmail.com,Ecu worldwide,Kho 5,C√¥ng ty giao nh·∫≠n v·∫≠n t·∫£i (Forwader),Nh√¢n vi√™n hi·ªán tr∆∞·ªùng,Tr√™n 3 nƒÉm,ƒê√£ t·ª´ng s·ª≠ d·ª•ng,Tr√™n 3 l·∫ßn/th√°ng,4,...,3,3,3,3,3,4,4,4,4,4
1,11/6/2025 9:20:20,hiepanctancang@gmail.com,TNHH D·ªäCH V·ª§ v√† V·∫¨N T·∫¢I TH·∫æ GI·ªöI CH√çNH PH∆Ø∆†NG,"T√≤a nh√† Sky Center, CH2A, T·∫ßng tr·ªát, Block C, ...",C√¥ng ty xu·∫•t nh·∫≠p kh·∫©u,Nh√¢n vi√™n hi·ªán tr∆∞·ªùng,Tr√™n 3 nƒÉm,ƒê√£ t·ª´ng s·ª≠ d·ª•ng,Tr√™n 3 l·∫ßn/th√°ng,5,...,5,5,5,5,4,5,5,5,5,5
2,11/6/2025 9:25:51,huunguyen1801@gmail.com,Shipco Transport Vi·ªát Nam,01 ƒëinh l·ªÖ qu·∫≠n 4,C√¥ng ty giao nh·∫≠n v·∫≠n t·∫£i (Forwader),Nh√¢n vi√™n hi·ªán tr∆∞·ªùng,Tr√™n 3 nƒÉm,ƒê√£ t·ª´ng s·ª≠ d·ª•ng,Tr√™n 3 l·∫ßn/th√°ng,5,...,5,5,5,5,5,5,5,5,5,5
3,11/6/2025 13:42:17,nguyenketoan1@gmail.com,Melody,S·ªë 1 nguy·ªÖn vƒÉn ƒë·∫≠u,C√¥ng ty giao nh·∫≠n v·∫≠n t·∫£i (Forwader),Nh√¢n vi√™n hi·ªán tr∆∞·ªùng,1 - 3 nƒÉm,ƒê√£ t·ª´ng s·ª≠ d·ª•ng,Tr√™n 3 l·∫ßn/th√°ng,5,...,5,5,4,5,4,5,5,5,5,5
4,11/6/2025 13:42:50,duongtran.ab91@gmail.com,Ecu worldwide,Tp hochiminh,C√¥ng ty giao nh·∫≠n v·∫≠n t·∫£i (Forwader),Nh√¢n vi√™n hi·ªán tr∆∞·ªùng,Tr√™n 3 nƒÉm,ƒê√£ t·ª´ng s·ª≠ d·ª•ng,Tr√™n 3 l·∫ßn/th√°ng,5,...,4,4,4,4,4,4,4,4,4,4


In [45]:
# √î 2: Khai b√°o mapping thang ƒëo d√πng cho PLS-SEM (Task 3)

SCALES_TASK3 = {
    "NL": ["RE1", "RE2", "RE3", "RE4", "RE5"],                  # Ngu·ªìn l·ª±c
    "KQ": ["OU1", "OU2", "OU3", "OU4", "OU5"],                  # K·∫øt qu·∫£
    "QT": ["PR1", "PR2", "PR3", "PR4"],                         # Quy tr√¨nh
    "QL": ["MA1", "MA2", "MA3", "MA4", "MA5", "MA6"],          # Qu·∫£n l√Ω
    "HA": ["ISR1", "ISR2", "ISR3", "P1", "P2", "P3", "P4", "P5", "P6"],  # H√¨nh ·∫£nh & TNXH
    "HL": ["CS1", "CS2", "CS3"],                               # H√†i l√≤ng
    "CS": ["CS4", "CS5"],                                      # L√≤ng trung th√†nh (bi·∫øn ph·ª• thu·ªôc)
}

# Flatten ra list t·∫•t c·∫£ c√°c bi·∫øn quan s√°t d√πng trong m√¥ h√¨nh
all_items = [item for items in SCALES_TASK3.values() for item in items]

# DataFrame ch·ªâ ch·ª©a c√°c c·ªôt item (t·∫•t c·∫£ ƒë·ªÅu l√† s·ªë)
df_items = df[all_items].copy()

print("S·ªë bi·∫øn quan s√°t:", len(all_items))
print("K√≠ch th∆∞·ªõc df_items:", df_items.shape)
print("S·ªë missing m·ªói c·ªôt:\n", df_items.isna().sum())


S·ªë bi·∫øn quan s√°t: 34
K√≠ch th∆∞·ªõc df_items: (159, 34)
S·ªë missing m·ªói c·ªôt:
 RE1     0
RE2     0
RE3     0
RE4     0
RE5     0
OU1     0
OU2     0
OU3     0
OU4     0
OU5     0
PR1     0
PR2     0
PR3     0
PR4     0
MA1     0
MA2     0
MA3     0
MA4     0
MA5     0
MA6     0
ISR1    0
ISR2    0
ISR3    0
P1      0
P2      0
P3      0
P4      0
P5      0
P6      0
CS1     0
CS2     0
CS3     0
CS4     0
CS5     0
dtype: int64


In [46]:
# √î 3: ƒê·ªãnh nghƒ©a m√¥ h√¨nh c·∫•u tr√∫c (ƒë∆∞·ªùng d·∫´n gi·ªØa c√°c bi·∫øn ti·ªÅm ·∫©n)

structure = c.Structure()

# C√°c y·∫øu t·ªë NL, KQ, QT, QL, HA -> ·∫£nh h∆∞·ªüng HL (H√†i l√≤ng)
structure.add_path(["NL", "KQ", "QT", "QL", "HA"], ["HL"])

# C√°c y·∫øu t·ªë NL, KQ, QT, QL, HA, HL -> ·∫£nh h∆∞·ªüng CS (L√≤ng trung th√†nh)
structure.add_path(["NL", "KQ", "QT", "QL", "HA", "HL"], ["CS"])

# Xem ma tr·∫≠n ƒë∆∞·ªùng d·∫´n
path_matrix = structure.path()
path_matrix


Unnamed: 0,HA,QL,QT,KQ,NL,HL,CS
HA,0,0,0,0,0,0,0
QL,0,0,0,0,0,0,0
QT,0,0,0,0,0,0,0
KQ,0,0,0,0,0,0,0
NL,0,0,0,0,0,0,0
HL,1,1,1,1,1,0,0
CS,1,1,1,1,1,1,0


In [48]:
# √î 4: T·∫°o Config cho plspm (outer model)

cfg = c.Config(path_matrix, default_scale=Scale.NUM)

# Th√™m t·ª´ng bi·∫øn ti·ªÅm ·∫©n (latent variable) v·ªõi Mode A (ph·∫£n x·∫°)
for lv_name, indicators in SCALES_TASK3.items():
    cfg.add_lv(
        lv_name,
        Mode.A,
        *[c.MV(col) for col in indicators]
    )

print("ƒê√£ khai b√°o xong Config cho", len(SCALES_TASK3), "bi·∫øn ti·ªÅm ·∫©n.")


ƒê√£ khai b√°o xong Config cho 7 bi·∫øn ti·ªÅm ·∫©n.


In [51]:
# √î 5: Ch·∫°y PLS-PM

# M·ªôt s·ªë phi√™n b·∫£n plspm kh√¥ng h·ªó tr·ª£ max_iter v√† tol trong h√†m kh·ªüi t·∫°o
# H√£y th·ª≠ b·ªè hai tham s·ªë n√†y n·∫øu g·∫∑p l·ªói
plspm_model = Plspm(
    df_items,   # d·ªØ li·ªáu ch·ªâ g·ªìm c√°c item
    cfg,        # c·∫•u h√¨nh m√¥ h√¨nh
    Scheme.PATH  # scheme PATH (g·∫ßn gi·ªëng SmartPLS)
)

print("PLS-PM ƒë√£ ch·∫°y xong.")


PLS-PM ƒë√£ ch·∫°y xong.


In [52]:
# √î 6: L·∫•y c√°c b·∫£ng k·∫øt qu·∫£ ch√≠nh t∆∞∆°ng ƒë∆∞∆°ng SmartPLS

outer_model_df   = plspm_model.outer_model()       # outer loadings, communalities,...
inner_summary_df = plspm_model.inner_summary()     # R2, communality, redundancy,...
path_coeff_df    = plspm_model.path_coefficients() # h·ªá s·ªë ƒë∆∞·ªùng d·∫´n
effects_df       = plspm_model.effects()           # direct / indirect / total effects

outer_model_df.head(), inner_summary_df, path_coeff_df, effects_df.head()


(       weight   loading  communality  redundancy
 CS1  0.395145  0.795024     0.632062    0.470539
 CS2  0.415110  0.817222     0.667852    0.497183
 CS3  0.428129  0.809600     0.655453    0.487952
 CS4  0.593589  0.864300     0.747015    0.504707
 CS5  0.571200  0.852522     0.726794    0.491045,
           type  r_squared  r_squared_adj  block_communality  mean_redundancy  \
 CS  Endogenous   0.675631       0.662827           0.736905         0.497876   
 HA   Exogenous   0.000000       0.000000           0.573467         0.000000   
 HL  Endogenous   0.744450       0.736099           0.651789         0.485225   
 KQ   Exogenous   0.000000       0.000000           0.612733         0.000000   
 NL   Exogenous   0.000000       0.000000           0.612004         0.000000   
 QL   Exogenous   0.000000       0.000000           0.605374         0.000000   
 QT   Exogenous   0.000000       0.000000           0.631573         0.000000   
 
          ave  
 CS  0.736905  
 HA  0.573467  
 

In [53]:
# √î 7: Xu·∫•t to√†n b·ªô k·∫øt qu·∫£ Task 3 ra file Excel

output_path = "./output/Task3_PLS_Results.xlsx"

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    outer_model_df.to_excel(writer, sheet_name="Outer_Model")
    inner_summary_df.to_excel(writer, sheet_name="Inner_Summary")
    path_coeff_df.to_excel(writer, sheet_name="Path_Coefficients")
    effects_df.to_excel(writer, sheet_name="Effects")

print("ƒê√£ l∆∞u k·∫øt qu·∫£ Task 3 v√†o:", output_path)


ƒê√£ l∆∞u k·∫øt qu·∫£ Task 3 v√†o: ./output/Task3_PLS_Results.xlsx


# Task 4

In [61]:
# Cell 1 ‚Äî D√πng plspm (CHU·∫®N)
import pandas as pd
import numpy as np
from plspm.plspm import Plspm

In [65]:
# ==== Task 4: Ch·∫°y PLS-SEM ƒë√∫ng chu·∫©n v·ªõi plspm ====
# 1. ƒê·∫£m b·∫£o ƒë√£ import ƒë√∫ng c√°c th∆∞ vi·ªán v√† c·∫•u h√¨nh
import pandas as pd
import plspm.config as c
from plspm.plspm import Plspm
from plspm.scheme import Scheme
from plspm.mode import Mode
from plspm.scale import Scale

# 2. ƒê·ªçc l·∫°i d·ªØ li·ªáu v√† mapping thang ƒëo (n·∫øu c·∫ßn)
# (Gi·∫£ s·ª≠ ƒë√£ c√≥ df v√† SCALES_TASK3 t·ª´ c√°c cell tr∆∞·ªõc)

# 3. ƒê·ªãnh nghƒ©a inner model (ma tr·∫≠n ƒë∆∞·ªùng d·∫´n gi·ªØa c√°c latent variables)
structure = c.Structure()
structure.add_path(["NL", "KQ", "QT", "QL", "HA"], ["HL"])
structure.add_path(["NL", "KQ", "QT", "QL", "HA", "HL"], ["CS"])
path_matrix = structure.path()

# 4. T·∫°o Config cho plspm (outer model)
cfg = c.Config(path_matrix, default_scale=Scale.NUM)
for lv_name, indicators in SCALES_TASK3.items():
    cfg.add_lv(lv_name, Mode.A, *[c.MV(col) for col in indicators])

# 5. Chu·∫©n b·ªã d·ªØ li·ªáu ch·ªâ g·ªìm c√°c bi·∫øn quan s√°t (item)
all_items = [item for items in SCALES_TASK3.values() for item in items]
df_items = df[all_items].copy()

# 6. Ch·∫°y m√¥ h√¨nh PLS-SEM
plspm_model = Plspm(
    df_items,
    cfg,
    Scheme.PATH
 )

print("PLS-SEM ƒë√£ ch·∫°y xong.")

PLS-SEM ƒë√£ ch·∫°y xong.


IndexError: At least one sheet must be visible