In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
import random

In [4]:
data = pd.read_csv('diabetes.csv')
# Nowy plik na rozmyte od 0

In [5]:
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [6]:
data = data[data['Glucose'] != 0]
data = data[data['BloodPressure'] != 0]
data = data[data['SkinThickness'] != 0]
data = data[data['Insulin'] != 0]
data = data[data['BMI'] != 0]
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,392.0,392.0,392.0,392.0,392.0,392.0,392.0,392.0,392.0
mean,3.30102,122.627551,70.663265,29.145408,156.056122,33.086224,0.523046,30.864796,0.331633
std,3.211424,30.860781,12.496092,10.516424,118.84169,7.027659,0.345488,10.200777,0.471401
min,0.0,56.0,24.0,7.0,14.0,18.2,0.085,21.0,0.0
25%,1.0,99.0,62.0,21.0,76.75,28.4,0.26975,23.0,0.0
50%,2.0,119.0,70.0,29.0,125.5,33.2,0.4495,27.0,0.0
75%,5.0,143.0,78.0,37.0,190.0,37.1,0.687,36.0,1.0
max,17.0,198.0,110.0,63.0,846.0,67.1,2.42,81.0,1.0


In [7]:
# Klasa statyczna zawierająca metody przydatne to przetwarzania danych
class ProcessingData:

    # metoda tasująca wiersze pd.DataFrame
    @staticmethod
    def shuffle(df: pd.DataFrame) -> pd.DataFrame:
        """
        Receives a DataFrame, randomly shuffles it and returns a shuffled version.
        :param df: pd.DataFrame
        :return df: pd.DataFrame
        """
        for idx in range(len(df) - 1, 0, -1):
            rand_idx = random.randint(0, idx)
            df.iloc[idx], df.iloc[rand_idx] = df.iloc[rand_idx], df.iloc[idx]
        return df.reset_index(drop=True)

    # metoda normalizująca kolumny pd.DataFrame za pomocą normalizacji z użyciem odchylenia standardowego
    @staticmethod
    def normalize_std(df: pd.DataFrame, label: str) -> pd.DataFrame:
        """
        Receives a DataFrame and a class label to skip during normalization, normalizes data using min-max normalization and returns the normalized version.
        :param label: str
        :param df: pd.DataFrame
        :return df: pd.DataFrame
        """
        normalized = df.drop(labels=label, axis=1)
        normalized = (normalized-normalized.mean())/(normalized.std())
        return normalized.join(df[label])

    # metoda normalizująca kolumny pd.DataFrame za pomocą normalizacji minmax
    @staticmethod
    def normalize_minmax(df: pd.DataFrame, label: str) -> pd.DataFrame:
        """
        Receives a DataFrame and a class label to skip during normalization, normalizes data using min-max normalization and returns the normalized version.
        :param label: str
        :param df: pd.DataFrame
        :return df: pd.DataFrame
        """
        normalized = df.drop(labels=label, axis=1)
        normalized = (normalized-normalized.min())/(normalized.max()-normalized.min())
        return normalized.join(df[label])

    # metoda dzieląca pd.DataFrame na zbiór treningowy oraz walidacyjny
    @staticmethod
    def split(df: pd.DataFrame, ratio: float) -> tuple[pd.DataFrame, pd.DataFrame]:
        """
        Receives a DataFrame and returns two dataframes, split into training and validation DataFrames.
        :param df: pd.DataFrame
        :param ratio: float
        :return training_df, validation_df: tuple[pd.DataFrame, pd.DataFrame]
        """
        cutoff = int(len(df) * ratio)
        return df[:cutoff].reset_index(drop=True), df[cutoff:].reset_index(drop=True)

In [8]:
shuffled_data = ProcessingData.shuffle(data)
training_data, _ = ProcessingData.split(shuffled_data, 0.7)

In [16]:
class Fuzzy:
    def __init__(self):
        # dictionary of dictionaries of antecedents
        self.antecedents = {}
        # TODO: decide what this is
        self.consequent = None
        # TODO: choose the implementation
        self.rules = []

    @staticmethod
    def triangular_function(n: float, a: float, b: float, c: float) -> float:
        # print(n, a, b, c)
        if n <= a:
            return 0
        if a < n < b:
            return (n - a) / (b - a)
        if n == b:
            return 1
        if b < n < c:
            return (c - n) / (c - b)
        if n >= c:
            return 0

    @staticmethod
    def trapezoidal_function(n: float, a: float, b: float, c: float, d: float) -> float:
        # print(n, a, b, c, d)
        if c == d and n >= d:
            return 1
        if a == b and n <= a:
            return 1
        if n <= a:
            return 0
        if a < n < b:
            return (n - a) / (b - a)
        if b <= n <= c:
            return 1
        if c < n < d:
            return (d - n) / (d - c)
        if n >= d:
            return 0

    def membership_function(self, n: float, *args) -> None:
        # print(len(args))
        # print(*args)
        if len(args) == 3:
            print(self.triangular_function(n, *args))
        elif len(args) == 4:
            print(self.trapezoidal_function(n, *args))
        else:
            print("Nieprawidłowa liczba argumentów")

    def add_antecedent(self, parameter: str, linguistic_value: str, *args) -> None:
        # TODO: change the implementation
        self.antecedents.append([parameter, linguistic_value, *args])

    def add_consequent(self, parameter: str, linguistic_value: str, *args) -> None:
        # TODO: potentially change the implementation
        self.antecedents.append([parameter, linguistic_value, *args])

    def add_rule(self):
        # TODO: implementation
        print(self.rules)

    @staticmethod
    def view(self, parameter: str, linguistic_value: str) -> None:
        # TODO: plotting an antecedent
        print()
        # x = np.linspace(0., 7., num=101)
        # y = np.array([fuzzy.membership_function(i, 1, 1, 3, 7) for i in x])
        #
        # fig, ax = plt.subplots()
        # ax.plot(x, y)
        # loc = plticker.MultipleLocator(base=1.0)
        # ax.xaxis.set_major_locator(loc)
        #
        # plt.plot(x, y, 'r')
        # plt.show()

In [21]:
fuzzy = Fuzzy()

In [28]:
# fuzzy.membership_function(9, 5, 10, 17, 17) # high
# print()
# fuzzy.membership_function(145, 125, 155, 199, 199) # high
# print()
# fuzzy.membership_function(80, 90, 102, 122, 122) # high
# print()
# fuzzy.membership_function(46, 36, 43, 99, 99) # high
# print()
# fuzzy.membership_function(130, 76, 125, 190) # medium
# fuzzy.membership_function(130, 125, 190, 846, 846) # high
# print()
# fuzzy.membership_function(37.9, 28, 37, 67.1, 67.1) # high
# print()
# fuzzy.membership_function(0.637, 0.25, 0.5, 0.75) # medium
# fuzzy.membership_function(0.637, 0.5, 0.75, 2.42, 2.42) # high
# print()
# fuzzy.membership_function(40, 21, 21, 34, 45) # low
# fuzzy.membership_function(40, 34, 45, 56) # medium