# Clustering example
This is a sample example using **K-means Clustering** model.
We will use Xavier's dataset. The main goal is to have a basic understanding of clustering in order classify the cyclones.

In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
%matplotlib inline

In [2]:
data = pd.read_csv('./nyc-rolling-sales-clean-1.csv', sep=";")
data.head()

Unnamed: 0,Land_sqf,Gross_sqf,Year_built,NBH_level,SALE_PRICE,YEARLY_RENT
0,2400,1552,1930,2,220485,10900
1,2742,1207,1925,2,223372,8100
2,5610,1320,1910,5,362981,19000
3,1758,1537,1910,5,245135,9700
4,1317,1339,1920,4,216477,12800


In [4]:
# we will use all the columns as our features
X = data.values
X

array([[  2400,   1552,   1930,      2, 220485,  10900],
       [  2742,   1207,   1925,      2, 223372,   8100],
       [  5610,   1320,   1910,      5, 362981,  19000],
       ...,
       [  1200,    450,   1987,      6, 154983,   9500],
       [  3411,   1960,   1920,      5, 311596,  23400],
       [  4100,   2340,   1970,      7, 408346,  25900]], dtype=int64)

In [5]:
Kmean = KMeans(n_clusters=5) # we will classify the data into 5 different groups
Kmean.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [7]:
Kmean.cluster_centers_

array([[3.45993023e+03, 2.71502326e+03, 1.94374419e+03, 6.81395349e+00,
        4.29338140e+05, 2.83232558e+04],
       [1.75536207e+03, 1.09587931e+03, 1.94558621e+03, 2.89655172e+00,
        1.69473845e+05, 8.71034483e+03],
       [8.09240000e+03, 4.47320000e+03, 1.94500000e+03, 6.00000000e+00,
        7.16065800e+05, 4.90200000e+04],
       [2.25322619e+03, 1.29635714e+03, 1.93613095e+03, 4.70238095e+00,
        2.36149679e+05, 1.41309524e+04],
       [2.86703960e+03, 1.63967327e+03, 1.93349505e+03, 6.71287129e+00,
        3.15653733e+05, 2.16207921e+04]])

In [7]:
Kmean.labels_
# we have 5 groups, 0, 1, 2, 3 and 4
# each item in X corresponds to each item in the following array

array([1, 1, 3, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 3, 1, 1, 1, 0,
       0, 3, 0, 1, 1, 1, 3, 3, 1, 2, 2, 1, 0, 3, 3, 0, 4, 1, 2, 1, 3, 2,
       2, 2, 0, 3, 3, 3, 3, 2, 1, 3, 2, 2, 3, 3, 3, 3, 2, 1, 3, 2, 2, 2,
       3, 3, 3, 3, 2, 0, 3, 3, 3, 4, 3, 1, 1, 1, 2, 2, 1, 1, 3, 3, 0, 0,
       0, 3, 2, 1, 3, 2, 2, 2, 3, 1, 3, 2, 2, 3, 3, 0, 1, 0, 0, 2, 2, 1,
       1, 3, 0, 3, 2, 1, 3, 1, 2, 2, 3, 3, 3, 0, 4, 3, 0, 2, 3, 1, 3, 3,
       2, 3, 2, 1, 1, 1, 3, 0, 1, 2, 2, 3, 1, 1, 1, 2, 1, 1, 1, 0, 0, 0,
       0, 3, 3, 3, 1, 1, 2, 2, 2, 3, 3, 1, 0, 3, 3, 0, 0, 1, 3, 3, 3, 3,
       1, 3, 3, 1, 3, 3, 3, 1, 1, 3, 1, 2, 1, 0, 0, 2, 0, 3, 1, 1, 1, 3,
       3, 3, 0, 3, 3, 3, 1, 4, 1, 2, 2, 2, 1, 3, 3, 2, 3, 3, 1, 1, 0, 1,
       3, 1, 0, 3, 1, 1, 1, 1, 2, 3, 3, 1, 2, 0, 0, 0, 1, 2, 2, 2, 1, 3,
       3, 1, 3, 0, 3, 1, 0, 0, 2, 3, 1, 3, 1, 3, 3, 1, 1, 3, 1, 0, 2, 3,
       3, 1, 0, 4, 3, 3, 1, 2, 0, 1, 1, 3, 0, 1, 2, 3, 3, 0, 0, 3, 3, 3,
       3, 3, 2, 3, 0])