In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
class ProcessingData:
    @staticmethod
    def splitSet(x: pd.DataFrame,k: int) -> pd.DataFrame: 
        n = int(len(x)*k) 
        xTrain = x[:n] 
        xVal = x[n:] 
        return xTrain, xVal
    
    @staticmethod
    def shuffle(x: pd.DataFrame) -> pd.DataFrame: 
        for i in range(len(x)-1, -1, -1):
            j = np.random.randint(i, len(x))
            x.iloc[i], x.iloc[j] = x.iloc[j], x.iloc[i]
        return x
    
    @staticmethod
    def normalize(x: pd.DataFrame) -> pd.DataFrame:
        values = x.loc[:, x.columns != 'Outcome']
        columnNames=values.columns.tolist()
        for column in columnNames:
            data = x.loc[:,column]
            max1 = max(data)
            min1 = min(data)
            for row in range(0,len(x),1):
                x.at[row,column] = (x.at[row,column]-min1)/(max1-min1)
        return x

In [None]:
class soft_set:
    
    @staticmethod
    def build_soft_set(x: pd.DataFrame) -> dict:
        soft_set = {}
        soft_set[0] = {}
        soft_set[1] = {}
        for key in soft_set:
            data1 = x
            data2 = x.loc[x['Outcome']==key]
            data1 = data1.drop(['Outcome'], axis=1)
            data2 = data2.drop(['Outcome'], axis=1)
            for col in data1.columns:
                mean = data1[col].mean()
                temp1 = 0 # lower or equal to mean
                temp2 = 0 # greater than mean
                for val in data2[col]:
                    if val < mean:
                        temp1 += 1
                    else:
                        temp2 += 1
                if temp1 > temp2:
                    soft_set[key][col] = 0
                else:
                    soft_set[key][col] = 1
        return soft_set
 
    @staticmethod
    def get_membership(soft_set: dict, vector: pd.Series) -> float:
        vector = vector.to_dict()
        result = dict()
        for key in soft_set:
            result[key] = 1
            for key2 in soft_set[key]:
                result[key] += soft_set[key][key2] * vector[key2]
        return float(result[max(result, key=result.get)])

In [None]:
class soft_KNN:
    @staticmethod
    def calc_euclidian_distance(val1: float, val2: float) -> float:
        return np.abs(val1-val2)
        
    @staticmethod
    def clustering(x: pd.DataFrame, sample: pd.Series, k: int) -> str:
        soft_x = soft_set.build_soft_set(x)
        distances = []
        for i in range(0, len(x)):
            temp_x = soft_set.get_membership(soft_x, x.iloc[i])
            temp_sample = soft_set.get_membership(soft_x, sample)
            distances.append(soft_KNN.calc_euclidian_distance(temp_x, temp_sample))
        tempdf = x.copy()
        tempdf['distance'] = distances
        tempdf = tempdf.sort_values(by='distance')
        classes = {
                   0: 0,
                   1: 0
                  }
        for i in range(k):
            classes[tempdf.iloc[i].Outcome] += 1
        
        return max(classes, key = classes.get)

In [None]:
# Wynik tworzenia zbioru miękkiego dla danych z bazy diabetes.csv
example = pd.read_csv(r'diabetes\\diabetes.csv')
example = ProcessingData.shuffle(example)
example = ProcessingData.normalize(example)
exampleT, exampleV = ProcessingData.splitSet(example, 0.7)
soft_set_T = soft_set.build_soft_set(exampleT)
soft_set_V = soft_set.build_soft_set(exampleV)
print(soft_set_T)
#print(soft_set_V)

In [None]:
arr_of_results = []
for f in range(10):
    diabetes = pd.read_csv(r'diabetes\\diabetes.csv')
    diabetes = ProcessingData.shuffle(diabetes)
    diabetes = ProcessingData.normalize(diabetes)
    diabetesT, diabetesV = ProcessingData.splitSet(diabetes, 0.7)

    acc = 0
    for i in range(0, len(diabetesV)):
        if diabetesV.iloc[i].Outcome == soft_KNN.clustering(diabetesT, diabetesV.iloc[i], int(np.sqrt(len(diabetesV)))):
            acc += 1
    print(f"Accuracy {f}:", acc/len(diabetesV)*100)
    arr_of_results.append(acc/len(diabetesV)*100)

In [None]:
print(arr_of_results)