In [5]:
import numpy as np
import matplotlib.pyplot as plt
import math
import random
import copy

In [6]:
#Extras
def eucledian_distance(A,B,dimensions):
    dE = 0
    for dimension in range(dimensions):
        dE += (A[dimension] - B[dimension])**2
    return math.sqrt(dE)

def newMean(lastCentroid,newPoints,dimensions):
    newMean = np.zeros([dimensions])
    if(len(newPoints) == 0):
        return lastCentroid
    for newPoint in (newPoints):
        for i in range(dimensions):
            newMean[i] += newPoint.getArray()[i]
    return newMean/len(newPoints)

In [7]:
class Number:
    def __init__(self,label:int,array,inferedLabel=-1):
        self.__label = label
        self.__array = array
        self.__inferedLabel = inferedLabel
    def setLabel(self,label): self.__label = label
    def setArray(self,array): self.__array = array
    def setInferedLabel(self,label): self.__inferedLabel = label
    def getArray(self): return self.__array
    def getLabel(self): return self.__label
    def getInferedLabel(self): return self.__inferedLabel
    def isEqual(self,o):
        for i,_ in enumerate(self.__array):
            if(o.getArray()[i] < self.__array[i] - 0.006 or o.getArray()[i] > self.__array[i] + 0.006):
                return False
        return True
    
    def __str__(self):
        return f"Label:{self.__label}, Array:{self.__array}"

class KMeansClassifier:
    #Constructor
    def __init__(self,dataSetTest,dataSetTrainig,K):
        self.__dataSetTest = dataSetTest
        self.__dataSetTraining = dataSetTrainig
        self.__K = K
        self.__NData = 0
        self.__trainingData = []
        self.__testData = []
        self.__centroids = []
        self.__variance = 0
    
    def __str__(self) -> str:
        return "K: {}, variance: {}".format(self.__K,self.__variance)

    def getVariance(self): return self.__variance
    def getK(self): return self.__K
    def getCentroids(self): return self.__centroids
    def getNData(self): return self.__NData
    def getTrainingData(self):return self.__trainingData
    def getTestData(self): return self.__testData

    def loadTestData(self):
        data = open(self.__dataSetTest)
        self.__NData += 1797
        currentData = 1797
        allData = []
        for currentNumber in range(currentData):
            str = data.readline()
            currentLabel = int(str.split(",")[64])
            currentArray = str.split(",")[:64]
            allData.append(Number(label=currentLabel,array=np.array(currentArray,dtype=float)))
        self.__testData = allData
    
    def loadTrainingData(self):
        data = open(self.__dataSetTraining)
        self.__NData += 3823
        allData = []
        currentData = 3823
        for currentNumber in range(currentData):
            str = data.readline()
            currentLabel = int(str.split(",")[64])
            currentArray = str.split(",")[:64]
            allData.append(Number(label=currentLabel,array=np.array(currentArray,dtype=float)))
        self.__trainingData = allData
        self.__trainingDataCopy = allData
    
    def createModel(self):
        centroids = []
        for k in range(self.__K):
            freq =[0.27, 0.053, 0.042, 0.039, 0.042, 0.036, 0.034, 0.035, 0.045, 0.032, 0.034, 0.035, 0.045, 0.044, 0.045, 0.054, 0.12]
            numbers = [i for i in range(17)]
            array = np.array([random.choices(numbers,weights=freq)[0] for _ in range(64)],dtype=float)
            array[0] = 0
            array[63] = 0
            centroid = Number(label=k,array=array)
            centroids.append(centroid)
        centroids = np.array(centroids)
        self.__centroids = copy.deepcopy(centroids)
        while True:
            eq = 0
            classesInCentroids = [[] for _ in range(self.__K)]
            for specificNumber in self.__trainingData:
                    pQueue = PriorityQueue()
                    #Get all distances from sNumber to any centroid
                    for centroid in centroids:
                        pQueue.enqueue(centroid,eucledian_distance(centroid.getArray(),specificNumber.getArray(),specificNumber.getArray().shape[0]))
                    #Get the nearest centroid
                    nearestCentroid = pQueue.dequeue()
                    specificNumber.setInferedLabel(nearestCentroid.getLabel())
                    classesInCentroids[specificNumber.getInferedLabel()].append(specificNumber)
            #Update the mean
            for _,classesCentroid in enumerate(classesInCentroids):
                centroids[_].setArray(newMean(centroids[_].getArray(),classesCentroid,64))          
            for _,centroid in enumerate(centroids):
                if(centroid.isEqual(self.__centroids[_])):
                    eq += 1
            if(eq == self.__K):
                break
            self.__centroids = copy.deepcopy(centroids)
        accuracy = 0
        for Class in classesInCentroids:
            error = abs(380-len(Class))
            accuracy += error #error
        accuracy /=  len(self.__trainingData)
        accuracy = (1 - accuracy) * 100
        print(f"accuracy :{accuracy:.2f}%")



            
class PriorityQueue:
    def __init__(self):
        self.queue = []
        self.size = 0
        
    def __str__(self):
        return ' '.join([str(i) for i in self.queue])
        
    def isEmpty(self):
        return self.size == 0
    
    def enqueue(self, data, priority):
        self.size += 1
        self.queue.append((data, priority))
        
    def dequeue(self):
        try:
            min = 0
            for i in range(self.size):
                if self.queue[i][1] < self.queue[min][1]:
                    min = i
            item = self.queue[min]
            del self.queue[min]
            self.size -= 1
            return item[0]
        except IndexError:
            print()
            exit()
            
    def peek(self):
        try:
            min = 0
            for i in range(self.size):
                if self.queue[i][1] < self.queue[min][1]:
                    min = i 
            return self.queue[min][0]
        except IndexError:
            print()
            exit()

In [8]:
KMeansModel = KMeansClassifier("optdigits.tes","optdigits.tra",10)
KMeansModel.loadTrainingData()
KMeansModel.createModel()