In [42]:
import numpy as np
import pandas as pd

from plotnine import *

from sklearn.cluster import DBSCAN, KMeans
from sklearn.mixture import GaussianMixture

from sklearn.metrics import silhouette_score

from sklearn.preprocessing import StandardScaler

In [43]:
df = pd.read_csv("auta.csv", index_col=0)
df

Unnamed: 0,Cena,Przebieg,Pojemnosc,KM,diesle
Audi A3,20900.0,164450.0,1896.0,110,66.9
Audi A4,25311.4,182000.0,1900.0,130,75.7
Audi A6,31500.0,185000.0,2496.0,170,82.5
Audi A8,109000.0,129380.0,4134.0,300,68.1
Audi Q7,179000.0,72000.0,3000.0,240,89.1
BMW 316,6150.0,186000.0,1600.0,105,1.7
BMW 318,14500.0,173980.0,1895.0,118,32.8
BMW 320,21699.0,178000.0,2000.0,150,81.4
BMW 520,55900.0,149350.0,1998.0,163,65.7
BMW 525,17110.0,207000.0,2499.0,163,87.7


In [44]:
auta_km = df.copy()
auta_gm = df.copy()
auta_db = df.copy()

In [45]:
sscaler = StandardScaler()

auta_km[auta_km.columns] = sscaler.fit_transform(df[df.columns])

# K-means

In [46]:
scores = []

for i in range(2,7):
  km = KMeans(n_clusters=i).fit(auta_km)
  auta_km['klastry_km'] = km.labels_
  df['klastry_km'] = km.labels_
  score = silhouette_score(df[['Cena']],df['klastry_km'])
  scores.append(score)


## Wartości Silhouette wdg ilości klastrów (KMeans) względem ceny aut:

In [47]:
for index,score in enumerate(scores, 2):
    print(f"{index} klustry: {score}")

2 klustry: 0.8373420255764807
3 klustry: 0.053724704066117734
4 klustry: -0.05451580226954292
5 klustry: -0.09621092663571996
6 klustry: -0.13356592828285066


# Gaussian Mixture  

In [48]:
auta_gm[auta_gm.columns] = sscaler.fit_transform(auta_gm[auta_gm.columns])

gm_scores = []

for i in range(2,7):
    print(i)
    gm = GaussianMixture(n_components=i).fit(auta_gm)
    gm_pred = gm.predict(auta_gm)
    auta_gm['klastry_gm'] = gm_pred
    df['klastry_gm'] = gm_pred
    score = silhouette_score(df[['Cena']],df['klastry_gm'])
    gm_scores.append(score)
    

2
3
4
5
6


## Wartości Silhouette wdg ilości klastrów (GM) względem ceny aut:

In [49]:
for index,score in enumerate(gm_scores, 2):
    print(f"{index} klustry: {score}")

2 klustry: 0.8888746785500887
3 klustry: 0.3274683314504112
4 klustry: 0.0758584855936178
5 klustry: 0.03302282673853458
6 klustry: -0.1493746258966626


# DBSCAN

In [None]:
db_scores = []

# 0.5, 5 / 1, 5 / 1, 10 / 1.5, 15 / 1.5, 10 / 1.5 , 5 / 2, 5/ 2, 10 / 2, 15 / 2.5 , 5 / 3, 5

In [88]:
auta_db[auta_db.columns] = sscaler.fit_transform(auta_db[auta_db.columns])
       

db = DBSCAN(eps=3,min_samples=5).fit(auta_db)
auta_db['klastry_db'] = db.labels_
df['klastry_db'] = db.labels_
score = silhouette_score(df[['Cena']],df['klastry_db'])
db_scores.append(score)


In [83]:
print(db_scores)

[-0.386932980138605, -0.386932980138605, -0.26911752155703195, -0.10796071846743982, 0.6420164933376616, 0.6420164933376616, 0.6420164933376616, 0.6420164933376616, 0.6420164933376616, 0.6420164933376616, 0.6420164933376616, 0.6884422800329756]


## Wartości Silhouette wdg ilości klastrów (DBSCAN) względem ceny aut:


In [89]:
print(f"EPS 0.5, MIN_SAMPLES 5: {db_scores[1]}")
print(f"EPS 1, MIN_SAMPLES 5: {db_scores[2]}")
print(f"EPS 1, MIN_SAMPLES 10: {db_scores[3]}")
print(f"EPS 1, MIN_SAMPLES 15: {db_scores[4]}")
print(f"EPS 1.5, MIN_SAMPLES 15: {db_scores[5]}")
print(f"EPS 1.5, MIN_SAMPLES 10: {db_scores[6]}")
print(f"EPS 1.5, MIN_SAMPLES 5: {db_scores[7]}")
print(f"EPS 2, MIN_SAMPLES 5: {db_scores[8]}")
print(f"EPS 2, MIN_SAMPLES 10: {db_scores[9]}")
print(f"EPS 2, MIN_SAMPLES 15: {db_scores[10]}")
print(f"EPS 2.5, MIN_SAMPLES 5: {db_scores[11]}")
print(f"EPS 3, MIN_SAMPLES 5: {db_scores[12]}")

EPS 0.5, MIN_SAMPLES 5: -0.386932980138605
EPS 1, MIN_SAMPLES 5: -0.26911752155703195
EPS 1, MIN_SAMPLES 10: -0.10796071846743982
EPS 1, MIN_SAMPLES 15: 0.6420164933376616
EPS 1.5, MIN_SAMPLES 15: 0.6420164933376616
EPS 1.5, MIN_SAMPLES 10: 0.6420164933376616
EPS 1.5, MIN_SAMPLES 5: 0.6420164933376616
EPS 2, MIN_SAMPLES 5: 0.6420164933376616
EPS 2, MIN_SAMPLES 10: 0.6420164933376616
EPS 2, MIN_SAMPLES 15: 0.6420164933376616
EPS 2.5, MIN_SAMPLES 5: 0.6884422800329756
EPS 3, MIN_SAMPLES 5: 0.6884422800329756
