In [1]:
import os
os.path.join('../')
from collections import defaultdict, OrderedDict
from typing import Dict, List, Any, Callable

from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from doggos.fuzzy_sets import Type1FuzzySet
from doggos.fuzzy_sets.fuzzy_set import FuzzySet
from doggos.induction.information_system import InformationSystem
from doggos.inference import MamdaniInferenceSystem
from doggos.inference.defuzzification_algorithms import center_of_gravity, karnik_mendel
from doggos.inference.inference_system import InferenceSystem
from doggos.knowledge import Rule, Clause, fuzzify, LinguisticVariable, Domain
from doggos.knowledge.consequents import MamdaniConsequent
from doggos.knowledge.consequents.consequent import Consequent
from doggos.utils.grouping_functions import create_set_of_variables
from doggos.utils.membership_functions.membership_functions import generate_equal_gausses, sigmoid, gaussian

In [8]:
dataset_name = "Parkinson"
ds = pd.read_csv("../data/" + dataset_name + ".csv", sep=";")
print(ds.head())
ds = ds.drop(columns=[ds.columns[0]])
ds.to_csv("../data/" + dataset_name + ".csv", sep=";", index=False)

   Unnamed: 0       F0       F1       F2       F3       F4       F5       F6  \
0           0  119.992  157.302   74.997  0.00784  0.00007  0.00370  0.00554   
1           1  122.400  148.650  113.819  0.00968  0.00008  0.00465  0.00696   
2           2  116.682  131.111  111.555  0.01050  0.00009  0.00544  0.00781   
3           3  116.676  137.871  111.366  0.00997  0.00009  0.00502  0.00698   
4           4  116.014  141.781  110.655  0.01284  0.00011  0.00655  0.00908   

        F7       F8  ...      F13      F14     F15       F16       F17  \
0  0.01109  0.04374  ...  0.06545  0.02211  21.033  0.414783  0.815285   
1  0.01394  0.06134  ...  0.09403  0.01929  19.085  0.458359  0.819521   
2  0.01633  0.05233  ...  0.08270  0.01309  20.651  0.429895  0.825288   
3  0.01505  0.05492  ...  0.08771  0.01353  20.644  0.434969  0.819235   
4  0.01966  0.06425  ...  0.10470  0.01767  19.649  0.417356  0.823484   

        F18       F19       F20       F21  Decision  
0 -4.813031  0.26648

In [40]:
pca = PCA(n_components=4)
values_no_decision = ds.drop(labels=["Decision"], axis=1)
min_max_scaler = StandardScaler()
values_no_decision = min_max_scaler.fit_transform(values_no_decision.values)
pca.fit(values_no_decision)
print(pca.explained_variance_ratio_)

[0.23327578 0.21109452 0.13147107 0.12027503]


In [74]:
pca = KernelPCA(n_components=4, kernel='cosine')
values_no_decision = ds.drop(labels=["Decision"], axis=1)
min_max_scaler = StandardScaler()
values_no_decision = min_max_scaler.fit_transform(values_no_decision.values)
pca.fit(values_no_decision)
print(pca.eigenvalues_ / np.sum(pca.eigenvalues_))

[0.33121241 0.32015514 0.19676206 0.15187038]


In [41]:
principal_components = pca.transform(values_no_decision)
cols = []
for idx in range(principal_components.shape[1]):
    cols.append(f'F{idx}')
pca_pd_ds = pd.DataFrame(principal_components, columns=cols)
pca_pd_ds['Decision'] = ds['Decision']
pca_pd_ds.head()

Unnamed: 0,F0,F1,F2,F3,Decision
0,-1.563533,-0.675045,-0.446649,0.330954,1
1,1.418342,0.170636,0.040022,0.429542,0
2,-0.152949,-1.766084,1.227255,-1.360971,1
3,1.555166,0.763236,0.288356,-0.016735,0
4,0.017222,1.580277,0.070721,-0.750512,1


In [42]:
pca_pd_ds.to_csv("../data/" + dataset_name + " StdPCA.csv", sep=";", index=False)
