In [120]:
import math
import numpy as np
import pandas as pd
import sys
import random

In [121]:
def ReadData(filename):
    f = open(filename,'r')
    lines = f.read().splitlines()
    f.close()
    items=[]
    for i in range(1,len(lines)):
        line = lines[i].split(',')
        itemFeatures = []
        for j in range(0,len(line)-1):
            v = float(line[j])
            itemFeatures.append(v)
        items.append(itemFeatures)
    random.shuffle(items)
    return items

In [122]:
def FindColMinMax(items):
    n = len(items[0])
    minima = [sys.maxsize for i in range(n)]
    maxima = [-sys.maxsize-1 for i in range(n)]
    for item in items:
        for f in range(len(item)):
            if item[f]<minima[f]:
                minima[f] = item[f]
            if item[f]>maxima[f]:
                maxima[f] = item[f]
    return maxima,minima

In [123]:
def InitializeMeans(items,k,cMin,cMax):
    f = len(items[0])
    means =[[0 for i in range(f)]for i in range(k)]
    for mean in means:
        for i in range(len(mean)):
            # uniform returns a random floating number between the two specified points
            # +-1 added to avoid a wide placement of a mean
            mean[i] = random.uniform(cMin[i]+1,cMax[i]-1)
    return means

In [124]:
def EucledianDistance(x,y):
    S = 0
    x = np.asarray(x)
    y = np.asarray(y)
    for i in range(len(x)):
        S += (x[i]-y[i])**2
    return math.sqrt(S)

In [125]:
def UpdateMeans(n,mean,item):
    for i in range(len(mean)):
        m = mean[i]
        m = (m*(n-1)+item[i])/float(n)
        mean[i] = round(m,3)
    return mean

In [126]:
def Classify(means,item):
    minimum = sys.maxsize
    index = -1
    for i in range(len(means)):
        dis = EucledianDistance(item, means[i])
        if(dis < minimum):
            minimum = dis
            index = i
    return index

In [127]:
def CalculateMeans(k,items,maxIterations=100000):
    cMin,cMax = FindColMinMax(items)
    means = InitializeMeans(items, k, cMin, cMax)
    clusterSizes = [0 for i in range(len(means))]
    belongsTo = [0 for i in range(len(items))]
    for e in range(maxIterations):
        noChange = True
        for i in range(len(items)):
            item = items[i]
            index = Classify(means, item)
            clusterSizes[index]+=1
            cSize = clusterSizes[index]
            means[index] = UpdateMeans(cSize, means[index], item)
            if(index!=belongsTo[i]):
                noChange = False
            belongsTo[i] = index
        if noChange:
            break
    return means

In [128]:
def FindCluster(means,items):
    clusters = [[]for i in range(len(means))]
    for item in items:
        index = Classify(means, item)
        clusters[index].append(item)
    return clusters

In [129]:
if __name__ == '__main__':
    items = ReadData('C:\\Users\\DELL\\Desktop\\AI ML\\DATA SETS\\Unsupervised Data Set (Unlabeled)\\Dataset1UnsupervisedIris.txt')
    k = 3
    means = CalculateMeans(k,items)
    clusters = FindCluster(means,items)
    print(means)
    count = 1
    for i in clusters:
        print("Cluster No. ",count)
        print(i)
        count += 1
    #newItem=[5.4,3.7,1.5,0.2]
    #print(Classify(means,newItem))

[[6.299, 2.89, 4.958, 1.7], [3.510304641119683, 1.7156334334240588, 5.7469044264089515, 2.78528750816119], [5.007, 3.359, 1.565, 0.288]]
Cluster No.  1
[[7.7, 3.0, 6.1, 2.3], [5.9, 3.2, 4.8, 1.8], [6.0, 3.4, 4.5, 1.6], [6.7, 3.0, 5.0, 1.7], [6.4, 2.7, 5.3, 1.9], [6.3, 3.3, 6.0, 2.5], [7.1, 3.0, 5.9, 2.1], [6.8, 2.8, 4.8, 1.4], [5.8, 2.7, 5.1, 1.9], [7.4, 2.8, 6.1, 1.9], [6.7, 3.3, 5.7, 2.1], [6.9, 3.1, 4.9, 1.5], [6.7, 3.1, 4.4, 1.4], [6.5, 3.0, 5.2, 2.0], [6.1, 2.9, 4.7, 1.4], [5.6, 2.7, 4.2, 1.3], [5.6, 2.5, 3.9, 1.1], [6.3, 3.3, 4.7, 1.6], [5.7, 2.5, 5.0, 2.0], [5.5, 2.6, 4.4, 1.2], [7.2, 3.0, 5.8, 1.6], [6.3, 2.9, 5.6, 1.8], [6.4, 3.2, 4.5, 1.5], [5.5, 2.4, 3.7, 1.0], [5.5, 2.3, 4.0, 1.3], [6.8, 3.2, 5.9, 2.3], [5.4, 3.0, 4.5, 1.5], [6.0, 2.9, 4.5, 1.5], [5.9, 3.0, 4.2, 1.5], [5.9, 3.0, 5.1, 1.8], [5.7, 2.9, 4.2, 1.3], [6.2, 2.8, 4.8, 1.8], [6.0, 2.2, 4.0, 1.0], [6.3, 3.4, 5.6, 2.4], [7.3, 2.9, 6.3, 1.8], [6.4, 3.2, 5.3, 2.3], [6.3, 2.7, 4.9, 1.8], [6.4, 3.1, 5.5, 1.8], [7.7, 2.6, 