In [2]:
import pandas as pd
import re
from statsmodels.multivariate.manova import MANOVA

# 1. Read in
df = pd.read_csv("cols_named_all_data_for_LDA.csv")

# 2. Make sure label_e is categorical
df['label_e'] = df['label_e'].astype('category')

# 3. Sanitize all column names: replace any non-word character with underscore
df.columns = [re.sub(r'\W+', '_', col) for col in df.columns]

# 4. Now build your response list & formula
response_cols = [c for c in df.columns if c != 'label_e']
formula = ' + '.join(response_cols) + ' ~ label_e'

# 5. Fit the MANOVA
maov = MANOVA.from_formula(formula, data=df)
print(maov.mv_test())

                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value   Num DF  Den DF  F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0412 40.0000 212.0000 123.2120 0.0000
         Pillai's trace  0.9588 40.0000 212.0000 123.2120 0.0000
 Hotelling-Lawley trace 23.2475 40.0000 212.0000 123.2120 0.0000
    Roy's greatest root 23.2475 40.0000 212.0000 123.2120 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
         label_e         Value   Num DF   Den DF  F Value Pr > F
----------------------------------------------------------------
           Wilks' lambda 0.3397 120.0000 636.0351  2.3001 0.0000
          Pillai's trace 0.8783 120.0000 642.

In [4]:
import pingouin as pg

# Assuming you already have:
# df, response_cols = [ all numeric column names ], and df['label_e'] categorical.

# Call box_m with the right arguments
boxm = pg.box_m(
    data=df, 
    dvs=response_cols, 
    group='label_e'
)

print(boxm)


     Chi2      df  pval  equal_cov
box   inf  2460.0   0.0      False


  u = -2 * (1 - c) * np.log(M)


In [5]:
#check univariate normality

from scipy.stats import shapiro

for col in response_cols:
    stat, p = shapiro(df[col])
    print(f"{col}: W={stat:.3f}, p={p:.3g}")


Portrayed_Valence: W=0.990, p=0.0615
Portrayed_Arousal: W=0.993, p=0.289
Admiration: W=0.844, p=2.36e-15
Anger_Rage: W=0.608, p=9.28e-24
Compassion: W=0.657, p=1.97e-22
Contempt: W=0.722, p=2.18e-20
Disappointment: W=0.683, p=1.19e-21
Fear: W=0.732, p=4.9e-20
Fear_Confirmed: W=0.484, p=1.17e-26
Gloating: W=0.470, p=6.21e-27
Gratification: W=0.656, p=1.88e-22
Gratitude: W=0.649, p=1.21e-22
Happiness: W=0.803, p=2.6e-17
Happy_For: W=0.637, p=5.68e-23
Hate: W=0.585, p=2.46e-24
Hope: W=0.776, p=2.12e-18
Love: W=0.701, p=4.29e-21
Pride: W=0.723, p=2.32e-20
Relief: W=0.542, p=2.22e-25
Remorse: W=0.507, p=3.6e-26
Resentment: W=0.604, p=7.49e-24
Sadness: W=0.648, p=1.11e-22
Satisfaction: W=0.731, p=4.23e-20
Shame: W=0.539, p=1.94e-25
Viewer_Valence: W=0.988, p=0.0364
Viewer_Arousal: W=0.983, p=0.00351
Viewer_Thinking: W=0.985, p=0.00726
Viewer_Interested_Engaged: W=0.974, p=0.000137
Viewer_Amused: W=0.862, p=2.11e-14
Viewer_Happy: W=0.825, p=2.56e-16
Viewer_Sad: W=0.623, p=2.23e-23
Viewer_Surp

In [7]:
print(maov.mv_test().results['label_e']['stat'].loc['Pillai\'s trace'])


Value      0.878324
Num DF        120.0
Den DF        642.0
F Value    2.214775
Pr > F          0.0
Name: Pillai's trace, dtype: object


In [9]:
import pandas as pd
import re
from statsmodels.multivariate.manova import MANOVA

# 1. Read in
df = pd.read_csv("cols_named_all_data_minus_num_face.csv")

# 2. Make sure label_e is categorical
df['label_e'] = df['label_e'].astype('category')

# 3. Sanitize all column names: replace any non-word character with underscore
df.columns = [re.sub(r'\W+', '_', col) for col in df.columns]

# 4. Now build your response list & formula
response_cols = [c for c in df.columns if c != 'label_e']
formula = ' + '.join(response_cols) + ' ~ label_e'

# 5. Fit the MANOVA
maov = MANOVA.from_formula(formula, data=df)
print(maov.mv_test())

                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value   Num DF  Den DF  F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0433 38.0000 214.0000 124.5586 0.0000
         Pillai's trace  0.9567 38.0000 214.0000 124.5586 0.0000
 Hotelling-Lawley trace 22.1179 38.0000 214.0000 124.5586 0.0000
    Roy's greatest root 22.1179 38.0000 214.0000 124.5586 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
         label_e         Value   Num DF   Den DF  F Value Pr > F
----------------------------------------------------------------
           Wilks' lambda 0.3593 114.0000 641.9264  2.2938 0.0000
          Pillai's trace 0.8415 114.0000 648.