In [64]:
# https://pypi.org/project/prince/0.7.1/
# https://maxhalford.github.io/prince/mca/

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy
import prince
# import seaborn as sns
# import statsmodels as sm
# import sys
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

adjASID = pd.ExcelFile ('Australian_Shark_Incident_Database_Public_Version-Adjusted.xlsx')
filteredASID = pd.read_excel (adjASID, 'Filtered_ASID')
filteredASID

Unnamed: 0,Season,State,Location,Shark Name,Shark Length,Provoked?,Activity,Gender,Age,Clothing,Clothing Colour
0,Summer,NSW,coastal,white shark,4.0-4.4,unprovoked,swimming,male,20-24,swimwear,blue
1,Fall,SA,coastal,white shark,3.5-3.9,provoked,spearfishing,male,20-24,wetsuit/drysuit,black
2,Fall,VIC,coastal,dusky shark,2.5-2.9,provoked,spearfishing,male,20-24,wetsuit/drysuit,black
3,Spring,VIC,island open ocean,white shark,4.0-4.4,provoked,snorkelling,male,25-29,wetsuit/drysuit,black
4,Summer,NSW,coastal,white shark,2.5-2.9,unprovoked,swimming,male,10-14,swimwear,green
...,...,...,...,...,...,...,...,...,...,...,...
96,Winter,WA,coastal,white shark,4.0-4.4,provoked,spearfishing,male,25-29,wetsuit/drysuit,black
97,Fall,TAS,coastal,seven gill shark,2.0-2.4,provoked,spearfishing,male,20-24,wetsuit,black
98,Fall,WA,estuary/harbour,bull shark,2.5-2.9,provoked,other: hull scraping,male,20-24,wetsuit,black
99,Winter,WA,island open ocean,unknown,2.5-2.9,unprovoked,snorkelling,female,50-54,wetsuit,black


In [65]:
mca = prince.MCA(
    n_components=83,
    n_iter=3,
    copy=True,
    check_input=True,
    engine='sklearn',
    random_state=42
)
mca = mca.fit(filteredASID)

In [66]:
one_hot = pd.get_dummies(filteredASID)

mca_no_one_hot = prince.MCA(one_hot=False)
mca_no_one_hot = mca_no_one_hot.fit(one_hot)

In [67]:
# about PCA but helps explain this - https://maxhalford.github.io/prince/pca/

mca.eigenvalues_summary

Unnamed: 0_level_0,eigenvalue,% of variance,% of variance (cumulative)
component,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.335,5.04%,5.04%
1,0.310,4.68%,9.72%
2,0.279,4.20%,13.92%
3,0.255,3.84%,17.76%
4,0.242,3.64%,21.40%
...,...,...,...
78,0.000,0.00%,100.00%
79,0.000,0.00%,100.00%
80,0.000,0.00%,100.00%
81,0.000,0.00%,100.00%


In [68]:
mca.total_inertia_

6.636363636363538

In [82]:
# Average Inertia % per Row --> if greater, then this case is important for the interpretation of the data
# http://cainarchaeology.weebly.com/number-of-dimensions-useful-for-data-interpretation.html

100/101

0.9900990099009901

In [70]:
# Average Inertia % per Column --> if greater, this factor is important for the interpretation of the data
# http://cainarchaeology.weebly.com/number-of-dimensions-useful-for-data-interpretation.html

100/83

1.2048192771084338

In [71]:
mca.row_coordinates(filteredASID)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,73,74,75,76,77,78,79,80,81,82
0,0.761205,-0.235322,0.017732,-0.320741,0.136888,0.003224,-0.031330,0.272695,-0.266110,0.120402,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
1,-0.525336,-0.339421,0.096562,-0.039012,-0.089879,-0.039736,0.369003,0.122175,-0.321239,-0.116762,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
2,-0.360575,-0.270901,0.334886,0.091196,0.096361,0.875061,-0.321827,-0.156168,-0.374521,-0.589428,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
3,-0.214113,0.354924,-0.027462,-0.052150,-0.050376,-0.384390,-0.013698,-0.063587,-0.758743,-0.608350,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
4,0.726088,-0.372241,0.117158,-0.116997,0.745072,0.627771,-0.184560,1.026520,0.388887,0.009715,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,-0.449115,-0.157308,0.095722,0.244348,0.098366,-0.234570,0.377849,0.206365,-0.559486,-0.452083,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
97,-0.944752,-0.424047,1.827781,0.009220,-1.097423,0.672621,-0.258627,-0.141819,-0.514273,0.662491,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
98,-0.336509,-0.714433,3.132569,-0.507221,-1.508835,0.198390,-0.327798,-0.407294,0.632497,0.100241,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002
99,-0.778514,1.196452,1.940165,-0.791617,0.425286,-0.507781,-0.393658,-0.284287,1.153432,-0.470599,...,0.136327,0.314087,-0.068079,0.05758,0.048973,0.201995,-0.083641,0.043507,0.000002,-0.000002


In [72]:
mca.column_coordinates(filteredASID).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,73,74,75,76,77,78,79,80,81,82
Season_Fall,-0.295672,-0.351237,0.398457,0.293616,-0.386938,-0.161974,-0.399072,-0.073511,0.479675,-0.050173,...,-0.005856,0.002214,0.018424,-0.018832,-0.022171,0.037189,0.033474,-0.063618,-0.036592,0.018577
Season_Spring,0.147829,-0.061517,-0.371077,-0.374788,-0.31619,-0.078177,0.168412,-0.367427,-0.428603,-0.017045,...,-0.005856,0.002214,0.018424,-0.018832,-0.022171,0.037189,0.033474,-0.063618,-0.036592,0.018577
Season_Summer,0.523209,0.213651,0.170356,-0.103614,0.557832,0.225813,-0.124188,0.428902,-0.18602,0.156858,...,-0.005856,0.002214,0.018424,-0.018832,-0.022171,0.037189,0.033474,-0.063618,-0.036592,0.018577
Season_Winter,-0.457411,0.082331,-0.142986,0.218835,-0.030351,-0.051173,0.269321,-0.076774,0.217412,-0.112013,...,-0.005856,0.002214,0.018424,-0.018832,-0.022171,0.037189,0.033474,-0.063618,-0.036592,0.018577
State_NSW,0.168119,-0.208499,-0.223203,-0.086922,0.321915,0.509242,-0.03349,-0.116158,0.324411,0.26014,...,-0.005856,0.002214,0.018424,-0.018832,-0.022171,0.037189,0.033474,-0.063618,-0.036592,0.018577


In [73]:
mca.plot(
    filteredASID,
    x_component=0,
    y_component=1,
    show_column_markers=True,
    show_row_markers=True,
    show_column_labels=True,
    show_row_labels=False
)

In [79]:
mca.row_contributions_.head().style.format('{:.2%}')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82
0,1.71%,0.18%,0.00%,0.40%,0.08%,0.00%,0.00%,0.36%,0.36%,0.07%,0.08%,0.01%,0.04%,0.08%,0.66%,0.14%,2.00%,0.03%,0.01%,0.28%,0.37%,0.59%,0.03%,0.16%,0.14%,0.03%,2.06%,0.09%,1.14%,0.14%,0.01%,0.70%,0.43%,0.79%,1.14%,0.47%,0.89%,1.03%,0.00%,7.16%,3.70%,0.35%,4.46%,1.94%,0.14%,0.59%,0.41%,3.60%,1.82%,1.67%,0.07%,0.00%,0.65%,1.97%,2.57%,0.20%,1.66%,0.65%,1.50%,4.14%,2.13%,0.70%,1.13%,0.57%,0.09%,0.53%,2.90%,4.24%,1.04%,0.25%,7.93%,1.88%,1.54%,0.07%,0.87%,0.98%,0.17%,0.00%,6.31%,0.06%,0.51%,0.27%,0.08%
1,0.82%,0.37%,0.03%,0.01%,0.03%,0.01%,0.64%,0.07%,0.52%,0.07%,2.49%,0.01%,0.43%,0.70%,0.28%,0.65%,0.98%,1.33%,0.06%,1.52%,0.53%,0.24%,0.05%,2.11%,0.07%,0.03%,0.12%,0.74%,0.14%,0.01%,2.82%,0.88%,0.02%,0.00%,0.26%,0.37%,0.23%,0.20%,0.02%,0.20%,3.21%,0.22%,0.05%,1.98%,0.48%,0.51%,0.24%,0.02%,0.90%,3.08%,1.26%,0.16%,0.43%,1.09%,0.09%,2.75%,0.10%,0.03%,0.34%,1.25%,0.55%,0.02%,0.17%,1.91%,1.82%,0.23%,0.03%,1.40%,0.00%,4.35%,0.33%,0.36%,1.62%,0.80%,0.98%,5.95%,1.05%,6.48%,2.80%,1.32%,4.07%,0.81%,0.27%
2,0.38%,0.23%,0.40%,0.03%,0.04%,3.49%,0.49%,0.12%,0.71%,1.78%,3.83%,2.01%,0.17%,0.86%,0.00%,1.60%,0.38%,0.04%,0.61%,1.12%,0.98%,0.19%,0.73%,0.37%,0.03%,0.18%,0.02%,1.56%,0.25%,0.01%,0.01%,0.10%,0.40%,0.09%,0.10%,7.40%,1.13%,2.48%,5.63%,0.00%,0.96%,2.08%,2.71%,0.05%,0.05%,1.75%,13.47%,0.87%,0.41%,4.29%,0.68%,2.91%,2.77%,1.89%,1.91%,3.26%,3.72%,1.17%,1.66%,5.35%,0.83%,0.15%,0.02%,5.48%,0.17%,0.22%,0.01%,0.20%,0.00%,0.71%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
3,0.14%,0.40%,0.00%,0.01%,0.01%,0.67%,0.00%,0.02%,2.89%,1.90%,0.06%,0.61%,0.41%,0.59%,0.27%,0.01%,1.43%,0.55%,1.48%,0.03%,0.04%,0.00%,0.57%,0.00%,0.01%,0.01%,0.82%,0.23%,0.13%,0.24%,0.12%,0.14%,0.17%,1.21%,0.83%,3.35%,0.08%,0.14%,0.00%,0.00%,2.55%,2.08%,0.93%,2.79%,1.77%,0.06%,0.42%,1.46%,1.98%,0.04%,1.11%,0.11%,1.85%,0.14%,1.29%,0.46%,4.87%,1.01%,0.40%,0.28%,2.55%,1.87%,0.06%,0.12%,1.98%,2.21%,0.82%,0.00%,0.00%,0.80%,4.13%,0.02%,8.78%,5.31%,0.01%,0.22%,0.01%,1.89%,4.79%,0.24%,0.26%,1.04%,0.23%
4,1.56%,0.44%,0.05%,0.05%,2.27%,1.80%,0.16%,5.11%,0.76%,0.00%,0.16%,0.04%,0.03%,0.18%,0.05%,2.45%,1.12%,0.01%,0.04%,0.09%,0.50%,0.66%,0.55%,0.20%,6.05%,3.38%,0.28%,0.17%,0.83%,0.44%,5.50%,2.40%,1.65%,4.33%,8.23%,1.08%,0.96%,0.02%,2.09%,0.66%,0.06%,0.46%,1.75%,0.43%,5.16%,2.37%,0.21%,2.11%,0.16%,0.55%,3.26%,1.07%,1.65%,2.25%,0.44%,0.10%,0.02%,0.51%,0.13%,0.38%,0.06%,1.29%,1.62%,0.57%,0.39%,0.12%,0.61%,0.13%,0.03%,1.32%,0.56%,0.00%,0.74%,0.01%,0.01%,0.02%,0.06%,0.02%,1.87%,0.71%,0.71%,1.72%,0.13%


In [80]:
mca.column_contributions_.style.format('{:.2%}')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82
Season_Fall,0.47%,0.72%,1.03%,0.61%,1.11%,0.22%,1.36%,0.05%,2.10%,0.02%,2.78%,0.07%,3.58%,1.48%,0.16%,5.04%,2.74%,0.36%,0.07%,0.28%,1.75%,0.02%,1.65%,2.65%,0.72%,0.32%,0.00%,0.06%,0.61%,1.03%,0.04%,0.70%,0.70%,0.32%,1.02%,2.31%,0.56%,0.47%,0.21%,2.34%,0.58%,0.02%,1.10%,3.56%,2.95%,2.79%,3.03%,0.05%,0.32%,0.76%,3.97%,4.54%,0.04%,0.68%,0.58%,0.22%,1.08%,3.04%,0.19%,0.48%,0.74%,0.18%,0.31%,1.16%,5.85%,0.05%,0.04%,0.07%,0.12%,0.06%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
Season_Spring,0.14%,0.03%,1.07%,1.19%,0.89%,0.06%,0.29%,1.43%,2.01%,0.00%,0.61%,3.14%,0.13%,0.18%,1.92%,4.99%,0.33%,0.33%,2.84%,1.08%,2.55%,3.52%,0.50%,0.43%,0.43%,0.09%,0.85%,0.04%,1.92%,1.69%,0.15%,0.38%,0.22%,2.57%,0.01%,0.02%,0.24%,0.02%,1.67%,0.03%,1.76%,0.25%,2.18%,0.01%,0.70%,2.05%,0.39%,0.59%,1.55%,2.37%,6.00%,0.98%,2.54%,1.26%,5.52%,1.95%,2.17%,0.01%,0.73%,0.02%,0.03%,0.15%,0.20%,0.45%,0.42%,1.87%,0.00%,0.04%,0.03%,0.07%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
Season_Summer,2.13%,0.38%,0.27%,0.11%,3.36%,0.61%,0.19%,2.35%,0.46%,0.33%,0.40%,3.14%,0.36%,0.39%,0.00%,2.21%,2.29%,0.00%,0.07%,2.61%,5.42%,0.00%,1.41%,0.20%,0.19%,0.00%,1.32%,0.17%,0.15%,0.06%,0.01%,0.31%,0.06%,0.07%,0.06%,0.77%,0.00%,0.59%,0.00%,0.70%,0.44%,0.51%,0.07%,0.01%,0.67%,0.01%,3.14%,0.36%,1.24%,1.43%,0.65%,0.69%,2.41%,0.26%,2.99%,5.55%,1.53%,3.07%,3.38%,2.42%,1.97%,0.00%,0.33%,1.45%,3.11%,0.06%,0.07%,0.09%,0.22%,0.08%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
Season_Winter,1.58%,0.06%,0.18%,0.47%,0.01%,0.03%,0.87%,0.07%,0.60%,0.16%,2.22%,0.15%,0.42%,0.62%,2.73%,1.80%,0.46%,0.00%,1.16%,4.66%,0.05%,3.19%,2.69%,0.09%,0.11%,0.07%,0.08%,0.16%,5.44%,0.01%,0.19%,0.49%,1.94%,4.96%,0.45%,0.06%,0.04%,0.00%,0.67%,0.36%,1.59%,1.67%,6.35%,2.58%,9.38%,0.00%,0.83%,0.08%,0.24%,0.28%,0.06%,0.00%,0.08%,0.03%,0.05%,0.50%,0.97%,0.16%,2.10%,1.26%,0.74%,0.00%,0.28%,0.09%,0.12%,2.90%,0.19%,0.07%,0.36%,0.08%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%,0.00%
State_NSW,0.31%,0.52%,0.66%,0.11%,1.58%,4.40%,0.02%,0.24%,1.97%,1.29%,1.74%,0.04%,0.41%,0.00%,0.29%,0.05%,0.35%,3.20%,0.02%,1.36%,0.01%,0.20%,0.01%,0.01%,1.34%,0.98%,1.74%,0.01%,0.23%,0.15%,0.15%,0.50%,0.96%,0.12%,0.00%,0.17%,0.04%,0.93%,3.09%,0.02%,1.80%,1.73%,3.79%,1.98%,0.02%,0.09%,0.22%,1.71%,0.19%,0.32%,0.33%,0.11%,0.00%,1.67%,0.06%,0.87%,0.18%,1.17%,0.23%,0.21%,5.37%,0.50%,0.07%,0.86%,0.91%,4.94%,0.38%,0.17%,0.27%,0.42%,3.68%,2.72%,1.02%,2.35%,1.65%,0.00%,0.32%,12.38%,15.63%,0.04%,0.36%,0.00%,0.00%
State_QLD,6.95%,3.45%,0.40%,1.87%,0.76%,0.30%,1.29%,0.03%,0.88%,0.50%,1.24%,0.00%,0.62%,2.32%,0.09%,0.00%,3.23%,0.76%,0.19%,0.00%,0.67%,1.01%,1.20%,0.00%,0.00%,3.95%,0.06%,0.80%,0.16%,0.62%,0.06%,0.11%,0.25%,0.21%,1.16%,0.00%,0.50%,0.03%,0.24%,2.47%,0.16%,0.95%,0.50%,0.10%,1.79%,0.10%,0.74%,0.70%,0.00%,0.18%,0.16%,0.32%,0.46%,2.41%,0.31%,1.04%,0.82%,0.23%,0.53%,0.00%,4.39%,7.69%,2.92%,0.03%,8.41%,8.53%,7.06%,0.04%,0.13%,0.11%,0.99%,0.73%,0.27%,0.63%,0.44%,0.00%,0.09%,3.32%,4.19%,0.01%,0.10%,0.00%,0.00%
State_SA,0.77%,0.11%,1.11%,0.70%,0.74%,1.64%,1.56%,0.33%,0.97%,0.01%,5.12%,0.00%,0.20%,0.87%,0.42%,0.13%,5.50%,1.04%,0.31%,6.15%,0.09%,1.28%,0.19%,1.01%,2.55%,0.13%,0.00%,0.56%,0.00%,0.06%,0.27%,0.22%,0.00%,0.35%,0.83%,1.67%,5.03%,4.87%,2.54%,0.39%,1.85%,3.81%,0.23%,5.94%,8.09%,0.86%,0.29%,0.36%,1.44%,0.55%,1.06%,0.52%,0.13%,2.03%,0.03%,4.32%,0.36%,1.80%,0.56%,0.07%,0.18%,0.02%,0.20%,0.09%,4.79%,0.01%,0.33%,0.28%,0.15%,0.10%,0.90%,0.66%,0.25%,0.57%,0.40%,0.00%,0.08%,3.02%,3.81%,0.01%,0.09%,0.00%,0.00%
State_TAS,1.40%,0.07%,1.20%,0.03%,5.44%,0.03%,0.37%,0.19%,2.16%,3.53%,1.50%,1.44%,0.38%,0.01%,3.43%,0.34%,5.75%,0.25%,2.17%,0.04%,2.06%,0.19%,0.46%,0.49%,3.25%,0.13%,0.32%,0.01%,0.38%,0.84%,0.42%,0.03%,0.84%,0.70%,1.98%,0.04%,1.82%,0.30%,0.31%,1.39%,0.92%,0.23%,0.90%,3.44%,0.87%,6.62%,0.41%,2.92%,1.87%,0.03%,5.27%,4.11%,0.56%,4.42%,0.92%,1.19%,0.29%,0.00%,0.23%,2.19%,0.89%,0.02%,0.04%,0.40%,0.13%,2.34%,0.45%,0.61%,7.06%,0.05%,0.45%,0.33%,0.12%,0.29%,0.20%,0.00%,0.04%,1.51%,1.91%,0.01%,0.04%,0.00%,0.00%
State_VIC,0.09%,0.01%,1.05%,0.08%,0.45%,0.02%,2.04%,0.03%,1.69%,0.39%,1.50%,0.40%,2.09%,0.20%,5.61%,4.46%,0.68%,4.52%,1.27%,8.72%,0.54%,0.15%,0.01%,0.57%,0.27%,0.03%,0.52%,0.04%,0.02%,0.01%,0.20%,0.31%,0.01%,0.92%,0.03%,6.68%,0.01%,2.53%,5.42%,2.79%,0.54%,1.65%,0.83%,1.30%,0.00%,1.13%,0.17%,0.00%,0.62%,0.25%,0.10%,0.30%,2.29%,1.49%,0.57%,0.10%,2.51%,2.10%,0.89%,0.69%,4.11%,1.30%,1.61%,3.50%,1.81%,0.08%,0.14%,0.59%,0.06%,0.13%,1.17%,0.86%,0.32%,0.75%,0.52%,0.00%,0.10%,3.93%,4.96%,0.01%,0.12%,0.00%,0.00%
State_WA,1.59%,0.02%,2.81%,0.04%,1.29%,2.16%,0.00%,0.11%,1.29%,2.07%,0.52%,0.03%,2.17%,0.04%,0.21%,2.09%,0.08%,0.44%,1.81%,1.14%,1.12%,0.95%,0.55%,0.10%,0.67%,0.26%,1.97%,0.17%,0.04%,0.41%,1.26%,0.30%,1.85%,0.56%,0.46%,0.49%,2.60%,0.88%,0.31%,7.91%,0.57%,10.61%,2.64%,0.00%,0.10%,0.00%,0.18%,0.00%,1.86%,1.71%,0.97%,0.95%,1.93%,3.72%,0.00%,0.03%,1.07%,1.99%,0.09%,0.84%,0.09%,4.27%,0.68%,0.17%,1.08%,0.00%,0.40%,0.33%,0.16%,0.22%,1.89%,1.39%,0.52%,1.20%,0.85%,0.00%,0.17%,6.34%,8.01%,0.02%,0.19%,0.00%,0.00%
