# Multiple Correspondence Analysis in Python

MCA is less well supported than PCA and LDA, and so we use the `prince` package. This still requires `scikit-learn` to power it under the hood.

In [1]:
import pandas as pd
import prince

balloons = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/balloons/adult+stretch.data",
    header=None,
)
balloons.columns = ["Color", "Size", "Action", "Age", "Inflated"]
balloons

Unnamed: 0,Color,Size,Action,Age,Inflated
0,YELLOW,SMALL,STRETCH,ADULT,T
1,YELLOW,SMALL,STRETCH,ADULT,T
2,YELLOW,SMALL,STRETCH,CHILD,F
3,YELLOW,SMALL,DIP,ADULT,F
4,YELLOW,SMALL,DIP,CHILD,F
5,YELLOW,LARGE,STRETCH,ADULT,T
6,YELLOW,LARGE,STRETCH,ADULT,T
7,YELLOW,LARGE,STRETCH,CHILD,F
8,YELLOW,LARGE,DIP,ADULT,F
9,YELLOW,LARGE,DIP,CHILD,F


In [2]:
mca = prince.MCA(
    n_components=3,
    n_iter=3,
    copy=True,
    check_input=True,
    engine="sklearn",
    random_state=42,
)
mca = mca.fit(balloons)
balloons_transformed = mca.transform(balloons)
balloons_transformed

Unnamed: 0,0,1,2
0,-0.75074,-0.447214,-0.447214
1,-0.75074,-0.447214,-0.447214
2,0.341791,-0.447214,-0.447214
3,0.341791,-0.447214,-0.447214
4,0.817897,-0.447214,-0.447214
5,-0.75074,-0.447214,0.447214
6,-0.75074,-0.447214,0.447214
7,0.341791,-0.447214,0.447214
8,0.341791,-0.447214,0.447214
9,0.817897,-0.447214,0.447214


In [3]:
mca.plot(
    balloons,
    x_component=0,
    y_component=1,
    show_column_markers=True,
    show_row_markers=True,
    show_column_labels=False,
    show_row_labels=False,
)