In [1]:
import pandas as pd
import plotly.express as px

## BASE CASE

500 indivs, 5 features numeriques, 5 features catégoriques

In [2]:
base_case = {
        "ClustMD": [184.296919, 28.09750008583069],
        "DenseClus": [42.257825000000025, 17.524900197982788],
        "Phillip & Ottaway":[30.007769000000053, 1.4196999073028564],
        "Kamila":[116.44922500000001, 1.6607000827789307],
        "K-Prototypes":[24.589850000000013, 2.361299991607666],
        "MixtComp":[153.73832499999997, 2.758500099182129],
        "Modha-Spangler":[117.761731, 13.666800022125244],
        "Pretopo-FAMD":[58.757799999999975, 3.3143999576568604],
        "Pretopo-UMAP":[77.55470599999995, 9.262700080871582],
        "Pretopo-PaCMAP":[59.472644, 3.909899950027466],
        "PretopoMD":[13.164044000000047, 19.644400119781494]
    }

In [3]:
base_df = pd.DataFrame(base_case).T
base_df.columns = ["Mem (MiB)", "Time (s)"]

In [4]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2, subplot_titles=('Memory usage (MiB)', 'Computation Time (s)'))

fig.add_trace(
    go.Bar(y=base_df["Mem (MiB)"],x=base_df.index),
    row=1, col=1
)

fig.add_trace(
    go.Bar(y=base_df["Time (s)"],x=base_df.index),
    row=1, col=2
)
fig.update_layout(showlegend=False)
fig.write_image("base_case_2.png")
fig

# IMPACT OF NUMBER OF INDIVIDUALS

In [5]:
individuals = {
    50 : {
        "ClustMD": [153.89841900000005, 5.3947999477386475],
        "DenseClus": [38.98828099999997, 12.313400030136108],
        "Phillip & Ottaway":[18.468744000000015, 0.5903000831604004],
        "Kamila":[98.800769, 0.9065999984741211],
        "K-Prototypes":[18.80075599999998, 0.6665999889373779],
        "MixtComp":[145.53901900000005, 1.7325999736785889],
        "Modha-Spangler":[97.27348099999995, 2.331900119781494],
        "Pretopo-FAMD":[51.22658799999999, 0.9841001033782959],
        "Pretopo-UMAP":[68.72263099999998, 6.794800043106079],
        "Pretopo-PaCMAP":[51.882825000000025, 1.1522998809814453],
        "PretopoMD":[0.2929690000000278, 0.20090007781982422]
    },
    100 : {
        "ClustMD": [159.78519400000005, 7.43939995765686],
        "DenseClus": [39.757825000000025, 12.659800052642822],
        "Phillip & Ottaway":[18.93751900000001, 0.6079998016357422],
        "Kamila":[97.19918099999995, 0.9523000717163086],
        "K-Prototypes":[19.121061999999995, 0.789600133895874],
        "MixtComp":[146.33595599999995, 1.799800157546997],
        "Modha-Spangler":[110.84769400000005, 3.5822999477386475],
        "Pretopo-FAMD":[50.76566200000002, 1.0813000202178955],
        "Pretopo-UMAP":[69.38279999999997, 6.924900054931641],
        "Pretopo-PaCMAP":[51.71091900000005, 1.273900032043457],
        "PretopoMD":[1.0273059999999532, 0.9865000247955322]
    },
    250 : {
        "ClustMD": [167.75393099999997, 15.001100063323975],
        "DenseClus": [39.843794, 13.854300022125244],
        "Phillip & Ottaway":[20.535124999999994, 0.7360999584197998],
        "Kamila":[98.19528099999997, 1.0425999164581299],
        "K-Prototypes":[19.925780999999972, 1.190999984741211],
        "MixtComp":[147.28519400000005, 2.002000093460083],
        "Modha-Spangler":[112.04686900000002, 7.2362000942230225],
        "Pretopo-FAMD":[53.92970599999995, 1.4344000816345215],
        "Pretopo-UMAP":[71.75386900000001, 7.409899950027466],
        "Pretopo-PaCMAP":[54.57416900000004, 1.7170000076293945],
        "PretopoMD":[3.2968999999999937, 5.503100156784058]
    },
    500 : {
        "ClustMD": [184.296919, 28.09750008583069],
        "DenseClus": [42.257825000000025, 17.524900197982788],
        "Phillip & Ottaway":[30.007769000000053, 1.4196999073028564],
        "Kamila":[116.44922500000001, 1.6607000827789307],
        "K-Prototypes":[24.589850000000013, 2.361299991607666],
        "MixtComp":[153.73832499999997, 2.758500099182129],
        "Modha-Spangler":[117.761731, 13.666800022125244],
        "Pretopo-FAMD":[58.757799999999975, 3.3143999576568604],
        "Pretopo-UMAP":[77.55470599999995, 9.262700080871582],
        "Pretopo-PaCMAP":[59.472644, 3.909899950027466],
        "PretopoMD":[13.164044000000047, 19.644400119781494]
    },
    1000 : {
        "ClustMD": [202.10936199999998, 56.56280016899109],
        "DenseClus": [46.242174999999975, 24.41479992866516],
        "Phillip & Ottaway":[30.746137999999974, 3.5961999893188477],
        "Kamila":[121.06638099999998, 3.5653998851776123],
        "K-Prototypes":[26.937538000000018, 6.138000011444092],
        "MixtComp":[153.74216900000005, 5.125],
        "Modha-Spangler":[117.25394400000005, 27.50309991836548],
        "Pretopo-FAMD":[83.195288, 7.648000001907349],
        "Pretopo-UMAP":[99.60546199999999, 14.289299964904785],
        "Pretopo-PaCMAP":[81.35940599999998, 8.548199892044067],
        "PretopoMD":[54.386761999999976, 69.87230014801025]
    },
    1750 : {
        "ClustMD": [213.07027499999998, 101.2829999923706],
        "DenseClus": [61.47265599999997, 34.29380011558533],
        "Phillip & Ottaway":[43.988238000000024, 9.441699981689453],
        "Kamila":[130.43354400000004, 8.537799835205078],
        "K-Prototypes":[45.79684400000002, 11.484600067138672],
        "MixtComp":[156.77738799999997, 10.962299823760986],
        "Modha-Spangler":[120.81645599999996, 50.71059989929199],
        "Pretopo-FAMD":[180.16410599999995, 16.594300031661987],
        "Pretopo-UMAP":[176.109375, 28.564499855041504],
        "Pretopo-PaCMAP":[150.238244, 21.41849994659424],
        "PretopoMD":[166.22265599999997, 215.3027000427246]
    },
    2500 : {
        "ClustMD": [225.593725, 149.09089994430542],
        "DenseClus": [89.62498799999997, 47.73539996147156],
        "Phillip & Ottaway":[133.199212, 17.837599992752075],
        "Kamila":[111.61714999999998, 16.02020001411438],
        "K-Prototypes":[82.55082499999997, 21.018100023269653],
        "MixtComp":[158.757812, 18.175600051879883],
        "Modha-Spangler":[119.48823099999998, 76.02270007133484],
        "Pretopo-FAMD":[348.1719, 42.92490005493164],
        "Pretopo-UMAP":[328.29293800000005, 69.36849999427795],
        "Pretopo-PaCMAP":[298.63671899999997, 46.4109001159668],
        "PretopoMD":[303.10935600000005, 341.7800998687744]
    },
    5000 : {
        "ClustMD": [289.773481, 341.10430002212524],
        "DenseClus": [175.769494, 79.72909998893738],
        "Phillip & Ottaway":[461.88671199999993, 69.62489986419678],
        "Kamila":[298.74998100000005, 61.2318000793457],
        "K-Prototypes":[301.464794, 73.57969999313354],
        "MixtComp":[291.9413880000001, 64.41799998283386],
        "Modha-Spangler":[294.164031, 178.10319995880127],
        "Pretopo-FAMD":[2017.824219, 176.59529995918274],
        "Pretopo-UMAP":[1405.499994, 423.83930015563965],
        "Pretopo-PaCMAP":[1165.1367559999999, 243.81610012054443],
        "PretopoMD":[1259.957025, 1373.0801000595093]
    }
}

In [6]:
individuals[50]['ClustMD']

[153.89841900000005, 5.3947999477386475]

In [7]:
nn = [50,100,250,500,1000,1750,2500,5000]
indiv_df = pd.DataFrame.from_dict(individuals)[nn]
indiv_mem = indiv_df.apply(lambda x: x.str[0]).T
fig = px.line(data_frame=indiv_mem,markers="*")
fig.update_layout(legend_title_text='Algorithm')
fig.update_xaxes(title="Number of individuals")
fig.update_yaxes(title="Maximum Memory Usage (MiB)")
fig

In [8]:
indiv_df = pd.DataFrame.from_dict(individuals)[nn]
indiv_time = indiv_df.apply(lambda x: x.str[1]).T
fig = px.line(data_frame=indiv_time,log_x=False,markers="*")
fig.update_layout(legend_title_text='Algorithm')
fig.update_xaxes(title="Number of individuals")
fig.update_yaxes(title="Computation Time (s)")
fig

In [9]:
n_num = {
    2 : {
        "ClustMD": [158.914012, 20.684799909591675],
        "DenseClus": [44.363269, 18.703999996185303],
        "Phillip & Ottaway":[23.578149999999994, 1.315000057220459],
        "Kamila":[103.543, 1.6481001377105713],
        "K-Prototypes":[20.449250000000006, 2.1430001258850098],
        "MixtComp":[147.925812, 2.636399984359741],
        "Modha-Spangler":[113.76561900000002, 12.003000020980835],
        "Pretopo-FAMD":[56.925805999999966, 3.3359999656677246],
        "Pretopo-UMAP":[76.44535000000002, 10.22160005569458],
        "Pretopo-PaCMAP":[57.02348099999995, 3.65910005569458],
        "PretopoMD":[13.714844000000028, 15.901400089263916]
    },
    5 :{
        "ClustMD": [184.296919, 28.09750008583069],
        "DenseClus": [42.257825000000025, 17.524900197982788],
        "Phillip & Ottaway":[30.007769000000053, 1.4196999073028564],
        "Kamila":[116.44922500000001, 1.6607000827789307],
        "K-Prototypes":[24.589850000000013, 2.361299991607666],
        "MixtComp":[153.73832499999997, 2.758500099182129],
        "Modha-Spangler":[117.761731, 13.666800022125244],
        "Pretopo-FAMD":[58.757799999999975, 3.3143999576568604],
        "Pretopo-UMAP":[77.55470599999995, 9.262700080871582],
        "Pretopo-PaCMAP":[59.472644, 3.909899950027466],
        "PretopoMD":[13.164044000000047, 19.644400119781494]
    },
    10 : {
        "ClustMD": [199.43753800000002, 24.576600074768066],
        "DenseClus": [44.89845599999995, 17.947499990463257],
        "Phillip & Ottaway":[26.414080999999953, 1.570000171661377],
        "Kamila":[101.15625599999998, 1.725100040435791],
        "K-Prototypes":[21.027344000000028, 2.306999921798706],
        "MixtComp":[150.171856, 3.133999824523926],
        "Modha-Spangler":[112.74214400000005, 15.881099939346313],
        "Pretopo-FAMD":[59.34373799999997, 3.063999891281128],
        "Pretopo-UMAP":[76.00392499999998, 9.351500034332275],
        "Pretopo-PaCMAP":[56.96483099999995, 3.7070000171661377],
        "PretopoMD":[13.54691200000002, 13.7360999584198]
    },
    20 : {
        "ClustMD": [219.79294400000003, 54.02439999580383],
        "DenseClus": [46.45702499999999, 17.75190019607544],
        "Phillip & Ottaway":[28.429705999999953, 1.7090001106262207],
        "Kamila":[104.00386900000001, 1.8550000190734863],
        "K-Prototypes":[24.53517499999998, 2.5360000133514404],
        "MixtComp":[150.777338, 3.8279998302459717],
        "Modha-Spangler":[113.93357500000002, 20.4685001373291],
        "Pretopo-FAMD":[57.812524999999994, 3.0710999965667725],
        "Pretopo-UMAP":[74.99609400000003, 9.609100103378296],
        "Pretopo-PaCMAP":[56.81246199999998, 4.1834001541137695],
        "PretopoMD":[13.667950000000019, 15.192700147628784]
    },
    50 : {
        "ClustMD": [390.69140600000003, 149.78139996528625],
        "DenseClus": [46.425749999999994, 18.183799982070923],
        "Phillip & Ottaway":[28.953075000000013, 2.3515000343322754],
        "Kamila":[114.07421900000003, 2.1989998817443848],
        "K-Prototypes":[30.585980999999947, 2.9739999771118164],
        "MixtComp":[156.23440599999998, 5.978299856185913],
        "Modha-Spangler":[115.77734400000003, 34.79960012435913],
        "Pretopo-FAMD":[57.38675599999999, 3.1150999069213867],
        "Pretopo-UMAP":[76.23823800000002, 9.525400161743164],
        "Pretopo-PaCMAP":[60.898405999999966, 4.983100175857544],
        "PretopoMD":[13.421861999999976, 14.77150011062622]
    },
    100 : {
        "ClustMD": [1161.914062, 411.7828998565674],
        "DenseClus": [45.499994000000015, 17.24910020828247],
        "Phillip & Ottaway":[34.890619000000015, 3.49800014495849],
        "Kamila":[114.246125, 2.7750000953674316],
        "K-Prototypes":[33.636675000000025, 4.019000053405762],
        "MixtComp":[168.49614400000002, 9.466599941253662],
        "Modha-Spangler":[118.16796900000003, 59.250800132751465],
        "Pretopo-FAMD":[59.359361999999976, 3.2741000652313232],
        "Pretopo-UMAP":[75.21095599999995, 9.781699895858765],
        "Pretopo-PaCMAP":[59.57815599999998, 6.569999933242798],
        "PretopoMD":[13.562494000000015, 16.02489995956421]
    },
}

In [10]:
nn = [2,5,10,20,50,100]
num_df = pd.DataFrame.from_dict(n_num)[nn]
num_mem = num_df.apply(lambda x: x.str[0]).T
px.line(data_frame=num_mem,title="Time (s)")

#nn = [50,100,250,500,1000,1750,2500,5000]
num_df = pd.DataFrame.from_dict(n_num)[nn]
num_mem = num_df.apply(lambda x: x.str[0]).T
fig = px.line(data_frame=num_mem,markers="*")
fig.update_layout(legend_title_text='Algorithm')
fig.update_xaxes(title="Number of Numerical Features")
fig.update_yaxes(title="Maximum Memory Usage (MiB)")
fig

In [11]:
num_time = num_df.apply(lambda x: x.str[1]).T
#px.line(data_frame=num_time, title="Memory Usage (MiB)")

fig = px.line(data_frame=num_time,log_x=False,markers="*")
fig.update_layout(legend_title_text='Algorithm')
fig.update_xaxes(title="Number Numerical Features")
fig.update_yaxes(title="Computation Time (s)")
fig

In [12]:
n_cat = {
    2 : {
        "ClustMD": [163.74611900000002, 24.157700061798096],
        "DenseClus": [44.71486900000002, 19.677599906921387],
        "Phillip & Ottaway":[26.15623099999999, 1.2940001487731934],
        "Kamila":[97.74217499999997, 1.5409998893737793],
        "K-Prototypes":[22.48828800000001, 2.174999952316284],
        "MixtComp":[150.21093099999996, 2.613100051879883],
        "Modha-Spangler":[113.660188, 9.431099891662598],
        "Pretopo-FAMD":[57.89453099999997, 3.2744998931884766],
        "Pretopo-UMAP":[73.347644, 9.408699989318848],
        "Pretopo-PaCMAP":[57.46873099999999, 3.5339999198913574],
        "PretopoMD":[13.074180999999953, 13.948699951171875]
    },
    5 :{
        "ClustMD": [184.296919, 28.09750008583069],
        "DenseClus": [42.257825000000025, 17.524900197982788],
        "Phillip & Ottaway":[30.007769000000053, 1.4196999073028564],
        "Kamila":[116.44922500000001, 1.6607000827789307],
        "K-Prototypes":[24.589850000000013, 2.361299991607666],
        "MixtComp":[153.73832499999997, 2.758500099182129],
        "Modha-Spangler":[117.761731, 13.666800022125244],
        "Pretopo-FAMD":[58.757799999999975, 3.3143999576568604],
        "Pretopo-UMAP":[77.55470599999995, 9.262700080871582],
        "Pretopo-PaCMAP":[59.472644, 3.909899950027466],
        "PretopoMD":[13.164044000000047, 19.644400119781494]
    },
    10 : {
        "ClustMD": [206.04301199999998, 39.76929998397827],
        "DenseClus": [45.011750000000006, 15.17519998550415],
        "Phillip & Ottaway":[23.09376900000001, 1.4890000820159912],
        "Kamila":[99.77339999999998, 1.7149999141693115],
        "K-Prototypes":[23.660162000000014, 2.270400047302246],
        "MixtComp":[48.41409400000003, 4.771500110626221],
        "Modha-Spangler":[111.90238799999997, 21.130199909210205],
        "Pretopo-FAMD":[56.75386200000003, 3.068700075149536],
        "Pretopo-UMAP":[76.40238099999999, 9.356499910354614],
        "Pretopo-PaCMAP":[60.71089999999998, 3.812000036239624],
        "PretopoMD":[14.542994000000022, 22.897000074386597]
    },
    20 : {
        "ClustMD": [223.89065, 59.13629984855652],
        "DenseClus": [46.328169, 14.841700077056885],
        "Phillip & Ottaway":[23.19923799999998, 1.6340000629425049],
        "Kamila":[9.16402499999998, 1.817000150680542],
        "K-Prototypes":[22.957055999999966, 2.5440001487731934],
        "MixtComp":[149.69142499999998, 3.452000141143799],
        "Modha-Spangler":[113.71091200000001, 35.03909993171692],
        "Pretopo-FAMD":[58.41797500000001, 3.1110000610351562],
        "Pretopo-UMAP":[73.83593099999996, 9.755599975585938],
        "Pretopo-PaCMAP":[58.38285000000002, 4.109899997711182],
        "PretopoMD":[12.960894000000053, 13.557500123977661]
    },
    50 : {
        "ClustMD": [435.83988100000005, 181.58169984817505],
        "DenseClus": [41.914012000000014, 15.102699995040894],
        "Phillip & Ottaway":[25.42972500000002, 2.3259999752044678],
        "Kamila":[101.41795000000002, 2.0460000038146973],
        "K-Prototypes":[29.062494000000015, 3.2049999237060547],
        "MixtComp":[153.152338, 4.736999988555908],
        "Modha-Spangler":[116.41013099999998, 79.72449994087219],
        "Pretopo-FAMD":[59.843725000000006, 3.2385001182556152],
        "Pretopo-UMAP":[74.02339999999998, 9.767699956893921],
        "Pretopo-PaCMAP":[63.12503099999998, 5.388700008392334],
        "PretopoMD":[13.746094000000028, 13.465200185775757]
    },
    100 : {
        "ClustMD": [1211.167931, 495.93039989471436],
        "DenseClus": [47.62890599999997, 15.217099905014038],
        "Phillip & Ottaway":[27.890669000000003, 2.7990000247955322],
        "Kamila":[139.238269, 2.4720001220703125],
        "K-Prototypes":[29.08986900000002, 3.881999969482422],
        "MixtComp":[156.60938099999998, 6.975300073623657],
        "Modha-Spangler":[120.94136200000003, 157.2970998287201],
        "Pretopo-FAMD":[60.878855999999985, 3.5114998817443848],
        "Pretopo-UMAP":[77.22269400000005, 9.758800029754639],
        "Pretopo-PaCMAP":[62.31640599999997, 6.734800100326538],
        "PretopoMD":[13.351605999999947, 14.860399961471558]
    },
}

In [13]:
cat_df = pd.DataFrame.from_dict(n_cat)[nn]
cat_mem = cat_df.apply(lambda x: x.str[0]).T
px.line(data_frame=cat_mem,title="Time (s)")

cat_df = pd.DataFrame.from_dict(n_cat)[nn]
cat_mem = cat_df.apply(lambda x: x.str[0]).T
fig = px.line(data_frame=cat_mem,markers="*")
fig.update_layout(legend_title_text='Algorithm')
fig.update_xaxes(title="Number of Categorical Features")
fig.update_yaxes(title="Maximum Memory Usage (MiB)")
fig

In [15]:
cat_time = cat_df.apply(lambda x: x.str[1]).T
#px.line(data_frame=cat_time, title="Memory Usage (MiB)")

fig = px.line(data_frame=cat_time,log_x=False,markers="*")
fig.update_layout(legend_title_text='Algorithm')
fig.update_xaxes(title="Number Categorical Features")
fig.update_yaxes(title="Computation Time (s)")
fig