In [39]:
import psycopg2
import numpy as np
import pandas as pd

import prince

import altair as alt
alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

# Loading data

## Querying database

In [12]:
# Loading data
conn = psycopg2.connect(database="full_STUPS",
                        user="postgres",
                        host='localhost',
                        password="postgres",
                        port=5432)
cur = conn.cursor()
cur.execute( 'select * from echantillon_propriete ')
samples = cur.fetchall()
conn.commit()
conn.close()
colnames = [desc[0] for desc in cur.description]

## Preparing dataset

In [28]:
# Subsampling data
subsample = False

X = pd.DataFrame(list(samples), columns=colnames)

X = X.drop(columns=['num_echantillon'])
X.set_index(['id', 'type_drogue'], inplace=True)

# Transforming None values to 0.0 for quantitative columns.
quanti_X = X[['_3_4_methylenedioxyethylamphetamine', '_3_4_methylene_dioxy_methylamphetamine', '_5f_adb', '_5f_mdmb_pica', 'acide_4_hydroxybutanoique', 'adb_butinaca', 'amphetamine', 'buprenorphine', 'cafeine', 'cannabidiol', 'cannabinol', 'clonazepam', 'cocaine', 'delta8_tetrahydrocannabinol', 'delta9_tetrahydrocannabinol', 'diametre', 'epaisseur', 'fub_amb', 'gammabutyrolactone', 'hauteur', 'heroine', 'ketamine', 'largeur', 'levamisole', 'lidocaine', 'longueur', 'masse', 'mdmb_4en_pinaca', 'methamphetamine', 'morphine', 'noscapine', 'o6_monoacetylmorphine', 'paracetamol', 'phenacetine', 'procaine']]
quanti_X = quanti_X.fillna(value=0.0).astype(float)

X[['_3_4_methylenedioxyethylamphetamine', '_3_4_methylene_dioxy_methylamphetamine', '_5f_adb', '_5f_mdmb_pica', 'acide_4_hydroxybutanoique', 'adb_butinaca', 'amphetamine', 'buprenorphine', 'cafeine', 'cannabidiol', 'cannabinol', 'clonazepam', 'cocaine', 'delta8_tetrahydrocannabinol', 'delta9_tetrahydrocannabinol', 'diametre', 'epaisseur', 'fub_amb', 'gammabutyrolactone', 'hauteur', 'heroine', 'ketamine', 'largeur', 'levamisole', 'lidocaine', 'longueur', 'masse', 'mdmb_4en_pinaca', 'methamphetamine', 'morphine', 'noscapine', 'o6_monoacetylmorphine', 'paracetamol', 'phenacetine', 'procaine']] = quanti_X[['_3_4_methylenedioxyethylamphetamine', '_3_4_methylene_dioxy_methylamphetamine', '_5f_adb', '_5f_mdmb_pica', 'acide_4_hydroxybutanoique', 'adb_butinaca', 'amphetamine', 'buprenorphine', 'cafeine', 'cannabidiol', 'cannabinol', 'clonazepam', 'cocaine', 'delta8_tetrahydrocannabinol', 'delta9_tetrahydrocannabinol', 'diametre', 'epaisseur', 'fub_amb', 'gammabutyrolactone', 'hauteur', 'heroine', 'ketamine', 'largeur', 'levamisole', 'lidocaine', 'longueur', 'masse', 'mdmb_4en_pinaca', 'methamphetamine', 'morphine', 'noscapine', 'o6_monoacetylmorphine', 'paracetamol', 'phenacetine', 'procaine']]

if subsample:
    X = X.iloc[:3750, :]
    
print(f'Samples shape : {X.shape}')

Samples shape : (20073, 51)


In [30]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,_3_4_methylenedioxyethylamphetamine,_3_4_methylene_dioxy_methylamphetamine,_5f_adb,_5f_mdmb_pica,abime,acide_4_hydroxybutanoique,adb_butinaca,amphetamine,buprenorphine,cafeine,...,noscapine,o6_monoacetylmorphine,ovule,paracetamol,phenacetine,presentation,procaine,secabilite_recto,secabilite_verso,visqueux
id,type_drogue,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,Divers,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
2,Amphétamine et dérivés,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
3,Cannabis,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,,0.0,0.0,Sommités,0.0,,,
4,Héroïne,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
5,Héroïne,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23910,Héroïne,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
23911,Héroïne,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
23918,Cocaïne,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,
23919,Cocaïne,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0,0.0,0.0,,0.0,,,


In [33]:
famd = prince.FAMD(
    n_components=2,
    n_iter=3,
    copy=True,
    check_input=True,
    random_state=42,
    engine="sklearn",
    handle_unknown="error"  # same parameter as sklearn.preprocessing.OneHotEncoder
)
famd = famd.fit(X)

In [None]:
famd.plot(
    X,
    x_component=0,
    y_component=1,
    show_column_labels=False,
    color_rows_by='type_drogue'
)

In [37]:
famd.column_coordinates_

component,0,1
variable,Unnamed: 1_level_1,Unnamed: 2_level_1
_3_4_methylenedioxyethylamphetamine,0.2693961,0.01224698
_3_4_methylene_dioxy_methylamphetamine,1.506775e-34,1.023012e-31
_5f_adb,6.365442e-34,4.963172e-32
_5f_mdmb_pica,1.094611e-34,9.877183e-34
acide_4_hydroxybutanoique,5.042044e-35,1.414576e-34
adb_butinaca,4.226766e-35,2.8733260000000003e-32
amphetamine,0.0002414294,0.0009753492
buprenorphine,3.569662e-36,2.8614569999999998e-34
cafeine,0.00025145,0.002080351
cannabidiol,9.205766e-06,6.632167e-06
