# Using Scikit_learn

In [14]:
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

In [4]:
iris = datasets.load_iris()
x = iris.data
y = iris.target

In [5]:
x, y= shuffle(x,y, random_state=42)

In [6]:
model = KMeans(n_clusters=3, random_state=42)

In [7]:
iris_kmeans = model.fit(x)

In [8]:
labels = iris_kmeans.labels_
print(labels)

[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 1 0 2 2 2 2 2 0 0 0 0 1 0 0 1 1
 0 0 0 1 1 1 0 0 1 1 2 1 2 1 2 1 0 2 1 0 0 0 1 1 0 0 0 1 0 1 2 0 1 1 0 1 1
 1 1 2 1 0 1 2 0 0 1 2 0 1 0 0 1 1 2 1 2 2 1 0 0 1 2 0 0 0 1 2 0 2 2 0 1 1
 1 1 2 0 2 1 2 1 1 1 0 1 1 0 1 2 2 0 1 2 2 0 2 0 2 2 2 1 2 1 1 1 1 0 1 1 0
 1 2]


In [15]:
accuracy_score(y,labels)

0.8933333333333333

------------


# Custom Implementation from scratch

In [20]:
import numpy as np

def kmeans(X, k, num_iterations=100):
    # Randomly initialize centroids
    centroids = X[np.random.choice(X.shape[0], k, replace=False)]
    
    for _ in range(num_iterations):
        # Assign clusters based on closest centroid
        distances = np.linalg.norm(X[:, np.newaxis] - centroids, axis=2)
        labels = np.argmin(distances, axis=1)
        
        # Recalculate centroids
        centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
    
    return labels, centroids

In [21]:
data = np.random.rand(100, 2)  # Random dataset
labels, centroids = kmeans(data, k=3)
print(labels, centroids)

[0 0 0 1 1 0 0 0 2 2 1 1 1 2 2 0 1 0 2 0 2 2 1 0 2 1 1 2 1 0 2 2 0 0 0 2 1
 0 0 1 1 2 0 2 0 1 2 0 1 2 2 0 0 2 1 2 2 1 2 0 2 2 2 0 2 0 2 0 0 0 2 0 1 1
 1 2 1 1 2 1 2 1 0 0 0 2 1 2 0 1 2 1 2 0 0 0 2 2 2 2] [[0.76262214 0.37269741]
 [0.20049899 0.28851022]
 [0.50354367 0.80508451]]


# K means on real world data

In [22]:
import pandas as pd
from sklearn.cluster import KMeans

In [24]:
data = pd.read_csv('dataset/Melbourne/melb_data.csv')
data

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,Wheelers Hill,12 Strada Cr,4,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,2.0,2.0,652.0,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,2.0,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0
13577,Williamstown,83 Power St,3,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,2.0,4.0,436.0,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0
13578,Williamstown,96 Verdon St,4,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,1.0,5.0,866.0,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0


In [27]:
x = data[['Rooms','Propertycount','Distance', 'Postcode', 'Bathroom', 'Price']]


In [28]:
model = KMeans(n_clusters=5)
model.fit(x)

In [29]:
lables = model.labels_

In [30]:
data['Cluster'] = lables

In [33]:
data.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,Cluster
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,202.0,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0,3
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0,1
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0,3
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0,1
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0,3
