In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def ReadGlass(path):
    glass = np.loadtxt(path) #read file
    return glass[:,1:-1], glass[:,-1] #return columns 1-9 as pattern matrix and column 10 as classes vector

In [3]:
file_path = r'C:\Users\krist\Downloads\ece\8ο\Αναγνωριση προτυπων\Ergastiriakes Askisis\PatRec\DATA\glass.data'
X, y = ReadGlass(file_path)

In [4]:
class distanceClassifier:
    
    def __init__(self):
        self.classesNo = 0 #contains number of classes
        self.patternsNo = 0 #contains dataset size
        self.classes = [] #contains a data array for each class
        self.centers = [] #contains centers (euclid distance square)
        self.virtual_centers = [] #contains virtual centers (euclid distance square)
    
    '''
    input: data X, labels y
    fills classifier parameters in accordance to the dataset 
    '''
    def fit(self,X,y):
        self.classesNo = int(y[-1])
        self.patternsNo = len(X)
        self.classes = [np.array([X[i] for i in range(self.patternsNo) if int(y[i]) == j]) for j in range(1,self.classesNo+1)]
        #calls center finder methods to fill center arrays
        self.virtual_centers = self.find_virtual_centers(X,y)
        self.centers = self.find_centers(X,y)
        
    def find_virtual_centers(self,X,y): #returns dataset's virtual centers
        self.virtual_centers = [0]*self.classesNo #one virtual center for each class
        
        for ind, cls in enumerate(self.classes) : #for each class
            self.virtual_centers[ind] = np.mean(cls, axis=0) #find mean

        self.virtual_centers[3] = np.array([np.inf]*len(X[0])) #handling category 4 that doesnt contain patterns
        return np.array(self.virtual_centers)
        
    def find_centers(self,X,y): #returns dataset's centers
        self.centers = [0]*self.classesNo #one center for each class
        
        #determine class centers
        for ind, cls in enumerate(self.classes) : #need to find the minimum distance sum pattern for each class
            mind = np.inf #set initial minimum distance to infinity
            for pattern in cls: #for each pattern in the dataset
                d = np.sum((cls - pattern)**2) #find sum of distances from each pattern in its class
                if d < mind: #if distance sum < current minimum distance
                    self.centers[ind] = pattern #set this pattern as the new class center
                    mind = d #update min distance

        self.centers[3] = np.array([np.inf]*len(X[0])) #handling category 4 that doesnt contain patterns
        return np.array(self.centers)
    
    def evaluate(self,X,y,reference = 'virtual_centers'):#evaluates classification error using the specified centers, default = virtual centers
        #exception handling
        if reference == 'virtual_centers':
            centers = self.virtual_centers
            
        elif reference == 'centers':
            centers = self.centers
        
        else :
            raise Exception("choose a reference between 'centers' and 'virtual_centers'")
            
        correct_classifications = 0
        for ind, pattern in enumerate(X): #for each pattern
            cls = np.argmin(np.sum((centers - pattern)**2, axis=1)) #find the closest center
            if cls+1 == int(y[ind]): #if the classification is correct
                correct_classifications += 1 

        return 1 - correct_classifications/len(X) #error percentage

In [5]:
def LOOevaluate(X,y,reference = 'virtual_centers'):
    wrong_classifications = 0
    
    for i in range(len(X)): #for each pattern
        classifier = distanceClassifier() #build a new classificator
        classifier.fit(np.delete(X, i, axis=0),np.delete(y, i, axis=0)) #train it with the total dataset - the current pattern
        testX, testy = [X[i]], [y[i]] #test classificator on current pattern
        
        wrong_classifications += classifier.evaluate(testX,testy,reference)
        
    return wrong_classifications/len(X) #error percentage

In [7]:
classifier = distanceClassifier()
classifier.fit(X,y)

In [8]:
np.set_printoptions(precision=5, suppress=True)
for i,j in enumerate(classifier.virtual_centers):
    print(f'class {i+1}: {j}')

class 1: [ 1.51872 13.24229  3.55243  1.16386 72.61914  0.44743  8.79729  0.01271
  0.057  ]
class 2: [ 1.51862 13.11171  3.00211  1.40816 72.59803  0.52105  9.07368  0.05026
  0.07974]
class 3: [ 1.51796 13.43706  3.54353  1.20118 72.40471  0.40647  8.78294  0.00882
  0.05706]
class 4: [inf inf inf inf inf inf inf inf inf]
class 5: [ 1.51893 12.82769  0.77385  2.03385 72.36615  1.47    10.12385  0.18769
  0.06077]
class 6: [ 1.51746 14.64667  1.30556  1.36667 73.20667  0.       9.35667  0.
  0.     ]
class 7: [ 1.51712 14.44207  0.53828  2.12276 72.96586  0.32517  8.49138  1.04
  0.01345]


In [9]:
np.set_printoptions(precision=5, suppress=True)
for i,j in enumerate(classifier.centers):
    print(f'class {i+1}: {j}')

class 1: [ 1.51869 13.19     3.37     1.18    72.72     0.57     8.83     0.
  0.16   ]
class 2: [ 1.51811 12.96     2.96     1.43    72.92     0.6      8.79     0.14
  0.     ]
class 3: [ 1.51655 13.41     3.39     1.28    72.64     0.52     8.65     0.
  0.     ]
class 4: [inf inf inf inf inf inf inf inf inf]
class 5: [ 1.52058 12.85     1.61     2.17    72.18     0.76     9.7      0.24
  0.51   ]
class 6: [ 1.51888 14.99     0.78     1.74    72.5      0.       9.95     0.
  0.     ]
class 7: [ 1.51719 14.75     0.       2.      73.02     0.       8.53     1.59
  0.08   ]


In [17]:
print(f'min error:',classifier.evaluate(X,y))
print('max error:',LOOevaluate(X,y))

min error: 0.5186915887850467
max error: 0.5514018691588785


In [16]:
print(f'min error:',classifier.evaluate(X,y,reference = 'centers'))
print('max error:',LOOevaluate(X,y,reference = 'centers'))

min error: 0.5747663551401869
max error: 0.5841121495327103
