In [2]:
import os
os.path.join('../')
from collections import defaultdict, OrderedDict
from typing import Dict, List, Any, Callable

from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from doggos.fuzzy_sets import Type1FuzzySet
from doggos.fuzzy_sets.fuzzy_set import FuzzySet
from doggos.induction.information_system import InformationSystem
from doggos.inference import MamdaniInferenceSystem
from doggos.inference.defuzzification_algorithms import center_of_gravity, karnik_mendel
from doggos.inference.inference_system import InferenceSystem
from doggos.knowledge import Rule, Clause, fuzzify, LinguisticVariable, Domain
from doggos.knowledge.consequents import MamdaniConsequent
from doggos.knowledge.consequents.consequent import Consequent
from doggos.utils.grouping_functions import create_set_of_variables
from doggos.utils.membership_functions.membership_functions import generate_equal_gausses, sigmoid, gaussian

In [7]:
dataset_name = "wdbc"
ds = pd.read_csv("../data/" + dataset_name + ".csv", sep=";")
ds.head()

Unnamed: 0,F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F21,F22,F23,F24,F25,F26,F27,F28,F29,Decision
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,1
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,1
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,1
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,1
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,1


In [8]:
pca = PCA(n_components=4)
values_no_decision = ds.drop(labels=["Decision"], axis=1)
min_max_scaler = StandardScaler()
values_no_decision = min_max_scaler.fit_transform(values_no_decision.values)
pca.fit(values_no_decision)
print(pca.explained_variance_ratio_)

[0.44272026 0.18971182 0.09393163 0.06602135]


In [74]:
pca = KernelPCA(n_components=4, kernel='cosine')
values_no_decision = ds.drop(labels=["Decision"], axis=1)
min_max_scaler = StandardScaler()
values_no_decision = min_max_scaler.fit_transform(values_no_decision.values)
pca.fit(values_no_decision)
print(pca.eigenvalues_ / np.sum(pca.eigenvalues_))

[0.33121241 0.32015514 0.19676206 0.15187038]


In [9]:
principal_components = pca.transform(values_no_decision)
cols = []
for idx in range(principal_components.shape[1]):
    cols.append(f'F{idx}')
pca_pd_ds = pd.DataFrame(principal_components, columns=cols)
pca_pd_ds['Decision'] = ds['Decision']
pca_pd_ds.head()

Unnamed: 0,F0,F1,F2,F3,Decision
0,9.192837,1.948583,-1.123166,3.633731,1
1,2.387802,-3.768172,-0.529293,1.118264,1
2,5.733896,-1.075174,-0.551748,0.912083,1
3,7.122953,10.275589,-3.23279,0.152547,1
4,3.935302,-1.948072,1.389768,2.940639,1


In [10]:
pca_pd_ds.to_csv("../data/" + dataset_name + " StdPCA.csv", sep=";", index=False)
