In [17]:
import numpy as np
import pandas as pd
import os
import math
import random
from sklearn.neighbors import KNeighborsClassifier
import sklearn.metrics as met

from fuzzy import *

In [18]:
class FuzzyKNN:
    def __init__(self, sensitivity, exp_bound):
        # konstruktor
        self.sensitivity = sensitivity
        self.hyperboxes = None
        self.classes = np.array([])
        self.exp_bound = exp_bound
        
    def membership(self, pattern):
        # racuna pripadnost i vraca niz pripadnosti svakom hiperboksu
        min_pts = self.hyperboxes[:, 0, :]
        max_pts = self.hyperboxes[:, 1, :]

        a = np.maximum(0, (1 - np.maximum(0, (self.sensitivity * np.minimum(1, pattern - max_pts)))))
        b = np.maximum(0, (1 - np.maximum(0, (self.sensitivity * np.minimum(1, min_pts - pattern)))))
        
        return np.sum(a + b, axis=1) / (2 * len(pattern))
    
    def overlap_contract(self, index):
        #proveravamo da li se hiperboksevi preklapaju
        contracted = False
        for test_box in range(len(self.hyperboxes)):
            if self.classes[test_box] == self.classes[index]:
                # ignorisemo preklapanje hiperbokseva iste klase
                continue
            expanded_box = self.hyperboxes[index]
            box = self.hyperboxes[test_box]
            
            vj, wj = expanded_box #onaj za koji gledamo da li se preklapa sa nekim
            vk, wk = box

            # moguci slucajevi preklapanja
            # trazimo najmanje preklapanje
            delta_new = delta_old = 1
            min_overlap_index = -1
            for i in range(len(vj)):
                if vj[i] < vk[i] < wj[i] < wk[i]:
                    delta_new = min(delta_old, wj[i] - vk[i])
                elif vk[i] < vj[i] < wk[i] < wj[i]:
                    delta_new = min(delta_old, wk[i] - vj[i])
                
                elif vj[i] < vk[i] < wk[i] < wj[i]:
                    delta_new = min(delta_old, min(wj[i] - vk[i], wk[i] - vj[i]))

                elif vk[i] < vj[i] < wj[i] < wk[i]:
                    delta_new = min(delta_old, min(wj[i] - vk[i], wk[i] - vj[i]))

                if delta_old - delta_new > 0:
                    min_overlap_index = i
                    delta_old = delta_new

            # ako ima preklapanja, 
            # gledamo po kojoj strani smanjujemo hiperbokseve
            if min_overlap_index >= 0:
                i = min_overlap_index
                if vj[i] < vk[i] < wj[i] < wk[i]:
                    vk[i] = wj[i] = (vk[i] + wj[i])/2

                elif vk[i] < vj[i] < wk[i] < wj[i]:
                    vj[i] = wk[i] = (vj[i] + wk[i])/2

                elif vj[i] < vk[i] < wk[i] < wj[i]:
                    if (wj[i] - vk[i]) > (wk[i] - vj[i]):
                        vj[i] = wk[i]

                    else:
                        wj[i] = vk[i]

                elif vk[i] < vj[i] < wj[i] < wk[i]:
                    if (wk[i] - vj[i]) > (wj[i] - vk[i]):
                        vk[i] = wj[i]

                    else:
                        wk[i] = vj[i]

                self.hyperboxes[test_box] = np.array([vk, wk])
                self.hyperboxes[index] = np.array([vj, wj])
                contracted = True
                
        return contracted
    
    def train_pattern(self, X, Y):
        # funkcija koja trenira klasifikator
        target = Y
        
        # ako nemamo tu klasu u klasama
        if target not in self.classes:
            # pravimo hiperboks
            if self.hyperboxes is not None:
                self.hyperboxes = np.vstack((self.hyperboxes, np.array([[X, X]])))
                #print('dodao sam hiperboks: ', self.hyperboxes)
                self.classes = np.hstack((self.classes, np.array([target])))
                #print('dodao sam klasu: ', self.classes)

            else:
                self.hyperboxes = np.array([[X, X]])
                self.classes = np.array([target])
        else:

            # sortiramo pripadnosti svim hiperboksevima za trazenu klasu
            memberships = self.membership(X)
            #print('memberships1: ', memberships)
            memberships[np.where(self.classes != target)] = 0
            #print('memberships2: ', memberships)
            memberships = sorted(list(enumerate(memberships)), key=lambda x: x[1], reverse=True)
            #print('memberships3: ', memberships)
            
            # Sirimo hiperboks
            count = 0
            while True:
                index = memberships[count][0]
                min_new = np.minimum(self.hyperboxes[index, 0, :], X)
                max_new = np.maximum(self.hyperboxes[index, 1, :], X)
                
                if self.exp_bound * len(np.unique(self.classes)) >= np.sum(max_new - min_new):
                    self.hyperboxes[index, 0] = min_new
                    self.hyperboxes[index, 1] = max_new
                    break
                else:
                    count += 1

                if count == len(memberships):
                    self.hyperboxes = np.vstack((self.hyperboxes, np.array([[X, X]])))
                    self.classes = np.hstack((self.classes, np.array([target])))
                    index = len(self.hyperboxes) - 1
                    break
                    
            contracted = self.overlap_contract(index)
        
    def fit(self, X, Y):
        for x, y in zip(X, Y):
            self.train_pattern(x, y)

    
    # predvidjamo klasu
    def predict(self, X, k):
        
        #uzimamo tacke koje odredjuju hiperbokseve
        min_pts = self.hyperboxes[:, 0, :]
        max_pts = self.hyperboxes[:, 1, :]
      
        # broj klasa 
        # i niz u kome cemo brojati pojavljivanje svake klase
        n_classes = len(np.unique(self.classes))
        cl = np.zeros(n_classes)
        
        # racunamo udaljenost tacke X od svakog hiperboksa
        # tako sto racunamo udaljenost X od prave koja prolazi kroz tacke koje odredjuju hiperboks
        distance = []
        for i in range(len(min_pts)):
            x1 = min_pts[i][0]
            y1 = min_pts[i][1]
            x2 = max_pts[i][0]
            y2 = max_pts[i][1]
            
            if(x1 == x2 and y1 == y2):
                d = abs(math.sqrt((x1-X[0])**2 + (y1 - X[1])**2))
                distance.append(d)
            elif(x1 == x2):
                d = min(abs(math.sqrt((x1-X[0])**2 + (y1 - X[1])**2)), 
                       abs(math.sqrt((x2-X[0])**2 + (y2 - X[1])**2)))
                distance.append(d)
            elif(y1 == y2):
                d = min(abs(math.sqrt((x1-X[0])**2 + (y1 - X[1])**2)), 
                       abs(math.sqrt((x2-X[0])**2 + (y2 - X[1])**2)))
                distance.append(d)
            else:
               # print('(x1, y1): ', x1, " ", y1, end="\n")
               # print('(x2, y2): ', x2, " ", y2, end="\n")
                d = abs((y2-y1)*X[0] - (x2-x1)*X[1] + x2*y1 - y2*x1) / math.sqrt((y2-y1)**2 + (x2-x1)**2)
                distance.append(d)

        # sortiramo udaljenosti od najmanje ka najvecoj
        # i uzimamo prvih k najblizih hiperbokseva
        distance = sorted(list(enumerate(distance)), key=lambda x: x[1])
        #print('distance: ', distance, end='\n')
        distance = distance[:k]
        #print('distance_k: ', distance, end='\n')
        distance_index = []
        for i in range(len(distance)):
            distance_index.append(distance[i][0])
            
        # brojimo pojavljivanje svake klase na osnovu hiperboksa koji joj pripada
        for i in range(len(distance_index)):
            index = distance_index[i]
            _class = self.classes[index]
            cl[_class] += 1
            #print('cl: ', cl, end = "\n")
        
        # nalazimo najbrojniju klasu koja je konacna klasa X
        max = 0
        final_class = 0
        for i in range (len(cl)):
            if(cl[i] >= max):
                max = cl[i]
                final_class = i
        
        
        return final_class
       
    # funkcija koja racuna procenat uspesno klasifikovanih instanci
    def score(self, X, Y, k):
        count = 0
        for x, y in zip(X, Y):
            pred = self.predict(x, k)
            if y == pred:
                count += 1
        print(count)
        print(len(Y))
        
        return count / len(Y)

IRIS dataset

In [19]:
df = pd.read_csv('iris.data', header=None, names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
df = df[~(df['class']=='Iris-virginica')]
#df.head()

df.replace(to_replace='Iris-setosa', value=0, inplace=True)

df.replace(to_replace='Iris-versicolor', value=1, inplace=True)

print(df.sample(frac=1))

X_train = df[['sepal length', 'petal length']].values
Y_train = df['class'].values

_max = np.max(X_train, axis=0)
_min = np.min(X_train, axis=0)
X_train = (X_train - _min) / (_max - _min)
print(X_train)

    sepal length  sepal width  petal length  petal width  class
96           5.7          2.9           4.2          1.3      1
86           6.7          3.1           4.7          1.5      1
37           4.9          3.1           1.5          0.1      0
60           5.0          2.0           3.5          1.0      1
49           5.0          3.3           1.4          0.2      0
79           5.7          2.6           3.5          1.0      1
78           6.0          2.9           4.5          1.5      1
73           6.1          2.8           4.7          1.2      1
77           6.7          3.0           5.0          1.7      1
19           5.1          3.8           1.5          0.3      0
26           5.0          3.4           1.6          0.4      0
58           6.6          2.9           4.6          1.3      1
5            5.4          3.9           1.7          0.4      0
33           5.5          4.2           1.4          0.2      0
80           5.5          2.4           

In [20]:
X_test, Y_test = X_train[-20:], Y_train[-20:]
X_train, Y_train = X_train[:-20], Y_train[:-20]
(Y_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
      dtype=int64)

In [21]:
clf1 = FuzzyKNN(sensitivity=1, exp_bound=0.4)
clf1.fit(X_train, Y_train)
clf1.score(X_test, Y_test, 4)

16
20


0.8

Fuzzy Min-Max Classificator - IRIS

In [22]:
clf2 = FuzzyMMC(sensitivity=1, exp_bound=0.4)
clf2.fit(X_train, Y_train)
clf2.score(X_test, Y_test)

0.85

KNN algoritam

In [23]:
clf3 = KNeighborsClassifier(n_neighbors = 4)
clf3.fit(X_train, Y_train)
Y_pred = clf3.predict(X_test)
accuracy = met.accuracy_score(Y_test, Y_pred)
accuracy

1.0

WINE dataset

In [24]:
print("\n---------------------------------\nWINE DATASET: \n\n")
#df = pd.read_csv('wine.csv', header=None, names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
#df = df[~(df['class']=='Iris-virginica')]
df = pd.read_csv('wine.csv')
df.head()

df.replace(to_replace=1, value=0, inplace=True)
df.replace(to_replace=2, value=1, inplace=True)
df.replace(to_replace=3, value=2, inplace=True)

print(df.sample(frac=1))

X_train = df[['Alcohol', 'Malic.acid']].values
Y_train = df['Wine'].values
_max = np.max(X_train, axis=0)
_min = np.min(X_train, axis=0)
X_train = (X_train - _min) / (_max - _min)
print(X_train)
print(Y_train)


---------------------------------
WINE DATASET: 


     Wine  Alcohol  Malic.acid   Ash   Acl   Mg  Phenols  Flavanoids  \
88      1    11.64        2.06  2.46  21.6   84     1.95        1.69   
83      1    13.05        3.86  2.32  22.5   85     1.65        1.59   
68      1    13.34        0.94  2.36  17.0  110     2.53        1.30   
131     2    12.88        2.99  2.40  20.0  104     1.30        1.22   
98      1    12.37        1.07  2.10  18.5   88     3.52        3.75   
164     2    13.78        2.76  2.30  22.0   90     1.35        0.68   
157     2    12.45        3.03  2.64  27.0   97     1.90        0.58   
37      0    13.05        1.65  2.55  18.0   98     2.45        2.43   
168     2    13.58        2.58  2.69  24.5  105     1.55        0.84   
122     1    12.42        4.43  2.73  26.5  102     2.20        2.13   
6       0    14.39        1.87  2.45  14.6   96     2.50        2.52   
10      0    14.10        2.16  2.30  18.0  105     2.95        3.32   
96      1   

In [25]:
X_test, Y_test = X_train[-20:], Y_train[-20:]
X_train, Y_train = X_train[:-20], Y_train[:-20]
(Y_test)

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
      dtype=int64)

Fuzzy Min-Max Classificator - WINE

In [26]:
clf2 = FuzzyMMC(sensitivity=1, exp_bound=0.4)
clf2.fit(X_train, Y_train)
clf2.score(X_test, Y_test)

0.65

Algoritam koji koristi KNN

In [29]:
clf1 = FuzzyKNN(sensitivity=1, exp_bound=0.4)
clf1.fit(X_train, Y_train)
clf1.score(X_test, Y_test, 3)

3
20


0.15

KNN algoritam

In [28]:
clf3 = KNeighborsClassifier(n_neighbors = 4)
clf3.fit(X_train, Y_train)
Y_pred = clf3.predict(X_test)
accuracy = met.accuracy_score(Y_test, Y_pred)
accuracy

0.45