In [1]:
import numpy as np
import pandas as pd
import os
import math
import random

from fuzzy import *

In [2]:
class FuzzyKNN:
    def __init__(self, sensitivity, exp_bound):
        # konstruktor
        self.sensitivity = sensitivity
        self.hyperboxes = None
        self.classes = np.array([])
        self.exp_bound = exp_bound
        
    def membership(self, pattern):
        # racuna pripadnost i vraca niz pripadnosti svakom hiperbiksu
        min_pts = self.hyperboxes[:, 0, :]
        max_pts = self.hyperboxes[:, 1, :]

        a = np.maximum(0, (1 - np.maximum(0, (self.sensitivity * np.minimum(1, pattern - max_pts)))))
        b = np.maximum(0, (1 - np.maximum(0, (self.sensitivity * np.minimum(1, min_pts - pattern)))))
        
        return np.sum(a + b, axis=1) / (2 * len(pattern))
    
    def overlap_contract(self, index):
        #proveravamo da li se hiperboksevi preklapaju
        contracted = False
        for test_box in range(len(self.hyperboxes)):
            if self.classes[test_box] == self.classes[index]:
                # ignorisemo preklapanje hiperbokseva iste klase
                continue
            expanded_box = self.hyperboxes[index]
            box = self.hyperboxes[test_box]
            
            vj, wj = expanded_box #onaj za koji gledamo da li se preklapa sa nekim
            vk, wk = box

            # moguci slucajevi preklapanja
            # trazimo najmanje preklapanje
            delta_new = delta_old = 1
            min_overlap_index = -1
            for i in range(len(vj)):
                if vj[i] < vk[i] < wj[i] < wk[i]:
                    delta_new = min(delta_old, wj[i] - vk[i])
                elif vk[i] < vj[i] < wk[i] < wj[i]:
                    delta_new = min(delta_old, wk[i] - vj[i])
                
                elif vj[i] < vk[i] < wk[i] < wj[i]:
                    delta_new = min(delta_old, min(wj[i] - vk[i], wk[i] - vj[i]))

                elif vk[i] < vj[i] < wj[i] < wk[i]:
                    delta_new = min(delta_old, min(wj[i] - vk[i], wk[i] - vj[i]))

                if delta_old - delta_new > 0:
                    min_overlap_index = i
                    delta_old = delta_new

            # ako ima preklapanja, 
            # gledamo po kojoj strani smanjujemo hiperboks
            if min_overlap_index >= 0:
                i = min_overlap_index
                # We need to contract the expanded box
                if vj[i] < vk[i] < wj[i] < wk[i]:
                    vk[i] = wj[i] = (vk[i] + wj[i])/2

                elif vk[i] < vj[i] < wk[i] < wj[i]:
                    vj[i] = wk[i] = (vj[i] + wk[i])/2

                elif vj[i] < vk[i] < wk[i] < wj[i]:
                    if (wj[i] - vk[i]) > (wk[i] - vj[i]):
                        vj[i] = wk[i]

                    else:
                        wj[i] = vk[i]

                elif vk[i] < vj[i] < wj[i] < wk[i]:
                    if (wk[i] - vj[i]) > (wj[i] - vk[i]):
                        vk[i] = wj[i]

                    else:
                        wk[i] = vj[i]

                self.hyperboxes[test_box] = np.array([vk, wk])
                self.hyperboxes[index] = np.array([vj, wj])
                contracted = True
                
        return contracted
    
    def train_pattern(self, X, Y):
        # funkcija koja trenira klasifikator
        target = Y
        
        # ako nemamo tu klasu u klasama
        if target not in self.classes:
            # pravimo hiperboks
            if self.hyperboxes is not None:
                self.hyperboxes = np.vstack((self.hyperboxes, np.array([[X, X]])))
                #print('dodao sam hiperboks: ', self.hyperboxes)
                self.classes = np.hstack((self.classes, np.array([target])))
                #print('dodao sam klasu: ', self.classes)

            else:
                self.hyperboxes = np.array([[X, X]])
                self.classes = np.array([target])
        else:

            # sortiramo pripadnosti svim hiperboksevima za trazenu klasu
            memberships = self.membership(X)
            #print('memberships1: ', memberships)
            memberships[np.where(self.classes != target)] = 0
            #print('memberships2: ', memberships)
            memberships = sorted(list(enumerate(memberships)), key=lambda x: x[1], reverse=True)
            #print('memberships3: ', memberships)
            
            # Sirimo hiperboks
            count = 0
            while True:
                index = memberships[count][0]
                min_new = np.minimum(self.hyperboxes[index, 0, :], X)
                max_new = np.maximum(self.hyperboxes[index, 1, :], X)
                
                if self.exp_bound * len(np.unique(self.classes)) >= np.sum(max_new - min_new):
                    self.hyperboxes[index, 0] = min_new
                    self.hyperboxes[index, 1] = max_new
                    break
                else:
                    count += 1

                if count == len(memberships):
                    self.hyperboxes = np.vstack((self.hyperboxes, np.array([[X, X]])))
                    self.classes = np.hstack((self.classes, np.array([target])))
                    index = len(self.hyperboxes) - 1
                    break
                    
            contracted = self.overlap_contract(index)
        
    def fit(self, X, Y):
        for x, y in zip(X, Y):
            self.train_pattern(x, y)

    
    
    def predict(self, X, k):
        min_pts = self.hyperboxes[:, 0, :]
        max_pts = self.hyperboxes[:, 1, :]
      
        n_classes = len(np.unique(self.classes))
        cl = np.zeros(n_classes)
        
        distance = []
        for i in range(len(min_pts)):
            x1 = min_pts[i][0]
            y1 = min_pts[i][1]
            x2 = max_pts[i][0]
            y2 = max_pts[i][1]
            
            if(x1 == x2 and y1 == y2):
                d = abs(math.sqrt((x1-X[0])**2 + (y1 - X[1])**2))
                distance.append(d)
            elif(x1 == x2):
                d = min(abs(math.sqrt((x1-X[0])**2 + (y1 - X[1])**2)), 
                       abs(math.sqrt((x2-X[0])**2 + (y2 - X[1])**2)))
                distance.append(d)
            elif(y1 == y2):
                d = min(abs(math.sqrt((x1-X[0])**2 + (y1 - X[1])**2)), 
                       abs(math.sqrt((x2-X[0])**2 + (y2 - X[1])**2)))
                distance.append(d)
            else:
               # print('(x1, y1): ', x1, " ", y1, end="\n")
               # print('(x2, y2): ', x2, " ", y2, end="\n")
                d = abs((y2-y1)*X[0] - (x2-x1)*X[1] + x2*y1 - y2*x1) / math.sqrt((y2-y1)**2 + (x2-x1)**2)
                distance.append(d)

        distance = sorted(list(enumerate(distance)), key=lambda x: x[1])
        #print('distance: ', distance, end='\n')
        
        distance = distance[:k]
        #print('distance_k: ', distance, end='\n')
        
        distance_index = []
        for i in range(len(distance)):
            distance_index.append(distance[i][0])
    
        for i in range(len(distance_index)):
            index = distance_index[i]
            _class = self.classes[index]
            cl[_class] += 1
            #print('cl: ', cl, end = "\n")
        
        
        max = 0
        final_class = 0
        for i in range (len(cl)):
            if(cl[i] >= max):
                max = cl[i]
                final_class = i
        
        #final_class = np.argmax(cl)
        
        return final_class
        
    def score(self, X, Y, k):
        count = 0
        for x, y in zip(X, Y):
            pred = self.predict(x, k)
            if y == pred:
                count += 1
        print(count)
        print(len(Y))
        
        return count / len(Y)

IRIS dataset

In [3]:
df = pd.read_csv('iris.data', header=None, names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
df = df[~(df['class']=='Iris-virginica')]
#df.head()

df.replace(to_replace='Iris-setosa', value=0, inplace=True)

df.replace(to_replace='Iris-versicolor', value=1, inplace=True)

print(df.sample(frac=1))

X_train = df[['sepal length', 'petal length']].values
Y_train = df['class'].values

_max = np.max(X_train, axis=0)
_min = np.min(X_train, axis=0)
X_train = (X_train - _min) / (_max - _min)
print(X_train)

    sepal length  sepal width  petal length  petal width  class
69           5.6          2.5           3.9          1.1      1
68           6.2          2.2           4.5          1.5      1
29           4.7          3.2           1.6          0.2      0
83           6.0          2.7           5.1          1.6      1
95           5.7          3.0           4.2          1.2      1
..           ...          ...           ...          ...    ...
46           5.1          3.8           1.6          0.2      0
16           5.4          3.9           1.3          0.4      0
25           5.0          3.0           1.6          0.2      0
59           5.2          2.7           3.9          1.4      1
5            5.4          3.9           1.7          0.4      0

[100 rows x 5 columns]
[[0.2962963  0.09756098]
 [0.22222222 0.09756098]
 [0.14814815 0.07317073]
 [0.11111111 0.12195122]
 [0.25925926 0.09756098]
 [0.40740741 0.17073171]
 [0.11111111 0.09756098]
 [0.25925926 0.12195122]
 [0.0370

In [4]:
X_test, Y_test = X_train[-20:], Y_train[-20:]
X_train, Y_train = X_train[:-20], Y_train[:-20]
(Y_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [5]:
clf1 = FuzzyKNN(sensitivity=1, exp_bound=0.4)

In [6]:
clf1.fit(X_train, Y_train)

In [7]:
clf1.score(X_test, Y_test, 4)

16
20


0.8

Fuzzy Min-Max Classificator - IRIS

In [8]:
clf2 = FuzzyMMC(sensitivity=1, exp_bound=0.4)

In [9]:
clf2.fit(X_train, Y_train)

In [10]:
clf2.score(X_test, Y_test)

0.85

WINE dataset

In [11]:
print("\n---------------------------------\nWINE DATASET: \n\n")
#df = pd.read_csv('wine.csv', header=None, names=['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
#df = df[~(df['class']=='Iris-virginica')]
df = pd.read_csv('wine.csv')
df.head()

df.replace(to_replace=1, value=0, inplace=True)
df.replace(to_replace=2, value=1, inplace=True)
df.replace(to_replace=3, value=2, inplace=True)

print(df.sample(frac=1))

X_train = df[['Alcohol', 'Malic.acid']].values
Y_train = df['Wine'].values
_max = np.max(X_train, axis=0)
_min = np.min(X_train, axis=0)
X_train = (X_train - _min) / (_max - _min)
print(X_train)
print(Y_train)


---------------------------------
WINE DATASET: 


     Wine  Alcohol  Malic.acid   Ash   Acl   Mg  Phenols  Flavanoids  \
124     1    11.87        4.31  2.39  21.0   82     2.86        3.03   
41      0    13.41        3.84  2.12  18.8   90     2.45        2.68   
118     1    12.77        3.43  1.98  16.0   80     1.63        1.25   
77      1    11.84        2.89  2.23  18.0  112     1.72        1.32   
150     2    13.50        3.12  2.62  24.0  123     1.40        1.57   
..    ...      ...         ...   ...   ...  ...      ...         ...   
14      0    14.38        1.87  2.38  12.0  102     3.30        3.64   
166     2    13.45        3.70  2.60  23.0  111     1.70        0.92   
137     2    12.53        5.51  2.64  25.0   96     1.79        0.60   
88      1    11.64        2.06  2.46  21.6   84     1.95        1.69   
104     1    12.51        1.73  1.98  20.5   85     2.20        1.92   

     Nonflavanoid.phenols  Proanth  Color.int   Hue    OD  Proline  
124           

In [12]:
X_test, Y_test = X_train[-20:], Y_train[-20:]
X_train, Y_train = X_train[:-20], Y_train[:-20]
(Y_test)

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

Fuzzy Min-Max Classificator - WINE

In [13]:
clf2 = FuzzyMMC(sensitivity=1, exp_bound=0.4)

In [14]:
clf2.fit(X_train, Y_train)

In [15]:
clf2.score(X_test, Y_test)

0.65

Algoritam koji koristi KNN

In [16]:
clf1 = FuzzyKNN(sensitivity=1, exp_bound=0.2)

In [17]:
clf1.fit(X_train, Y_train)

In [18]:
clf1.score(X_test, Y_test, 3)

4
20


0.2