# `models`

In [1]:
pip show FastKmedoids

Name: FastKmedoids
Version: 0.0.36
Summary: FastKmedoids is a Python package to apply the clustering algorithms Fast Kmedoids and KFold Fast Kmedoids.
Home-page: https://github.com/FabioScielzoOrtiz/FastKmedoids-package
Author: Fabio Scielzo Ortiz
Author-email: fabioscielzo98@gmail.com
License: 
Location: c:\Users\fscielzo\Documents\Proyectos\FastKmedoids\FastKmedoids-demo\.venv\Lib\site-packages
Requires: numpy, polars
Required-by: 
Note: you may need to restart the kernel to use updated packages.


In [2]:
import polars as pl
import pandas as pd
import numpy as np

## Data

In [3]:
data_url = "https://raw.githubusercontent.com/FabioScielzoOrtiz/FastKmedoids-demo/refs/heads/main/data/madrid_houses_processed.csv"

In [4]:
quant_cols = ['sq_mt_built', 'n_rooms', 'n_bathrooms', 'n_floors', 'buy_price']
binary_cols = ['is_renewal_needed', 'has_lift', 'is_exterior', 'has_parking']
multiclass_cols = ['energy_certificate', 'house_type']

p1 = len(quant_cols)
p2 = len(binary_cols)
p3 = len(multiclass_cols)

In [5]:
data_pl = pl.read_csv(data_url)
data_pd = pd.read_csv(data_url)
data_np = np.array(data_pd)

## `FastKmedoidsGGower`

In [6]:
from FastKmedoids.models import FastKmedoidsGGower

In [7]:
fast_kmedoids = FastKmedoidsGGower(n_clusters=3, method='pam', init='heuristic', max_iter=100, random_state=123,
                                   frac_sample_size=0.01, p1=5, p2=4, p3=2, 
                                   d1='robust_mahalanobis', d2='jaccard', d3='hamming', 
                                   robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20)
fast_kmedoids.fit(X=data_pd) 

In [8]:
fast_kmedoids.labels

array([2, 1, 1, ..., 0, 0, 0], dtype=int64)

In [None]:
fast_kmedoids = FastKmedoidsGGower(n_clusters=3, method='pam', init='heuristic', max_iter=100, random_state=123,
                                   frac_sample_size=0.01, p1=5, p2=4, p3=2, 
                                   d1='euclidean', d2='sokal', d3='hamming', 
                                   robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20)
fast_kmedoids.fit(X=data_pd) 

In [None]:
fast_kmedoids.labels

In [None]:
fast_kmedoids = FastKmedoidsGGower(n_clusters=3, method='pam', init='heuristic', max_iter=100, random_state=123,
                                   frac_sample_size=0.01, p1=5, p2=4, p3=2, 
                                   d1='canberra', d2='sokal', d3='hamming', 
                                   robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20)
fast_kmedoids.fit(X=data_pd) 

## `FoldFastKmedoidsGGower`

In [9]:
from FastKmedoids.models import FoldFastKmedoidsGGower

In [11]:
kfold_fast_kmedoids = FoldFastKmedoidsGGower(n_clusters=3, method='pam', init='heuristic', max_iter=100, random_state=123,
                                             frac_sample_size=0.1, n_splits=10, shuffle=True, kfold_random_state=123,
                                             p1=5, p2=4, p3=2, d1='robust_mahalanobis', d2='jaccard', d3='hamming', 
                                             robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20,
                                             fast_VG=False, VG_sample_size=1000, VG_n_samples=5)
kfold_fast_kmedoids.fit(X=data_pd) 

Clustering Folds: 100%|██████████| 10/10 [00:11<00:00,  1.13s/it]


In [12]:
kfold_fast_kmedoids.labels

array([0, 1, 1, ..., 1, 1, 1])

In [None]:
kfold_fast_kmedoids = FoldFastKmedoidsGGower(n_clusters=3, method='pam', init='heuristic', max_iter=100, random_state=123,
                                             frac_sample_size=0.1, n_splits=10, shuffle=True, kfold_random_state=123,
                                             p1=5, p2=4, p3=2, d1='euclidean', d2='sokal', d3='hamming', 
                                             robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20,
                                             fast_VG=False, VG_sample_size=1000, VG_n_samples=5)
kfold_fast_kmedoids.fit(X=data_pd) 

In [None]:
kfold_fast_kmedoids = FoldFastKmedoidsGGower(n_clusters=3, method='pam', init='heuristic', max_iter=100, random_state=123,
                                             frac_sample_size=0.1, n_splits=10, shuffle=True, kfold_random_state=123,
                                             p1=5, p2=4, p3=2, d1='canberra', d2='sokal', d3='hamming', 
                                             robust_method='trimmed', alpha=0.05, epsilon=0.05, n_iters=20,
                                             fast_VG=False, VG_sample_size=1000, VG_n_samples=5)
kfold_fast_kmedoids.fit(X=data_pd) 