In [1]:
# import libraries
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.multivariate.manova import MANOVA

In [2]:
# load dataset
df = sns.load_dataset("iris")
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [14]:
# manova
m_anova = MANOVA.from_formula('sepal_length + sepal_width + petal_length + petal_width ~ species', data=df)
print(m_anova.mv_test())
result = m_anova.mv_test()


                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value  Num DF  Den DF   F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0170 4.0000 144.0000 2086.7720 0.0000
         Pillai's trace  0.9830 4.0000 144.0000 2086.7720 0.0000
 Hotelling-Lawley trace 57.9659 4.0000 144.0000 2086.7720 0.0000
    Roy's greatest root 57.9659 4.0000 144.0000 2086.7720 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
        species          Value  Num DF  Den DF   F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0234 8.0000 288.0000  199.1453 0.0000
         Pillai's trace  1.1919 8.0000 290.00

# Interpretation - MANOVA intercept interactions
- The p-value to consider in this case is that of wilks' lambda, relative to the output variable can see, even in above case it is significant.
- This analysis shows that the relationship between different variables is strong and statistically significant. It suggests that the factors being studied have a significant impact on the outcomes, as indicated by low p-values (Pr > F). Different statistical measures, like Wilks' lambda, Pillai's trace, Hotelling-Lawley trace, and Roy's greatest root, all indicate strong relationships and high levels of significance. This means that the variables being examined have a substantial influence on the results, and this influence is unlikely to be due to random chance.

# Save result in Table form 

In [16]:
import pandas as pd

# Given result data
intercept_data = [
    ("Wilks' lambda", 0.0170, 4.0000, 144.0000, 2086.7720, 0.0000),
    ("Pillai's trace", 0.9830, 4.0000, 144.0000, 2086.7720, 0.0000),
    ("Hotelling-Lawley trace", 57.9659, 4.0000, 144.0000, 2086.7720, 0.0000),
    ("Roy's greatest root", 57.9659, 4.0000, 144.0000, 2086.7720, 0.0000)
]

species_data = [
    ("Wilks' lambda", 0.0234, 8.0000, 288.0000, 199.1453, 0.0000),
    ("Pillai's trace", 1.1919, 8.0000, 290.0000, 53.4665, 0.0000),
    ("Hotelling-Lawley trace", 32.4773, 8.0000, 203.4024, 582.1970, 0.0000),
    ("Roy's greatest root", 32.1919, 4.0000, 145.0000, 1166.9574, 0.0000)
]

# Create DataFrames
intercept_df = pd.DataFrame(intercept_data, columns=["Measure", "Value", "Num DF", "Den DF", "F Value", "Pr > F"])
species_df = pd.DataFrame(species_data, columns=["Measure", "Value", "Num DF", "Den DF", "F Value", "Pr > F"])

# Save as CSV
intercept_df.to_csv("intercept_results.csv", index=False)
species_df.to_csv("species_results.csv", index=False)

# Save as Excel
intercept_df.to_excel("intercept_results.xlsx", index=False)
species_df.to_excel("species_results.xlsx", index=False)

