In [16]:
from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
import pickle

In [28]:
def process_dataset(data, filename):
    X = data.data

    #Without scaling
    pca_unscaled = PCA(n_components=0.9)
    X_pca_unscaled = pca_unscaled.fit_transform(X)

    explained_variance_unscaled = pca_unscaled.explained_variance_ratio_

    #With scaling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    pca_scaled = PCA(n_components=0.9)
    X_pca_scaled = pca_scaled.fit_transform(X_scaled)

    explained_variance_scaled = pca_scaled.explained_variance_ratio_

    with open(filename, 'wb') as f:
        pickle.dump(list(explained_variance_scaled), f)

    print(f'{filename}:')
    print(f'[Bez skalowania]: Zachowano {len(explained_variance_unscaled)} wymiarów')
    print(f'[Ze skalowaniem]: Zachowano {len(explained_variance_scaled)} wymiarów')

    with open(filename, 'rb') as f:
        print(pickle.load(f), '\n')


In [29]:
data_breast_cancer = datasets.load_breast_cancer()
process_dataset(data_breast_cancer, 'pca_bc.pkl')

data_iris = load_iris()
process_dataset(data_iris, 'pca_ir.pkl')

pca_bc.pkl:
[Bez skalowania]: Zachowano 1 wymiarów
[Ze skalowaniem]: Zachowano 7 wymiarów
[0.44272025607526366, 0.1897118204403308, 0.09393163257431389, 0.06602134915470144, 0.05495768492346266, 0.04024522039883349, 0.02250733712982509] 

pca_ir.pkl:
[Bez skalowania]: Zachowano 1 wymiarów
[Ze skalowaniem]: Zachowano 2 wymiarów
[0.7296244541329991, 0.22850761786701745] 



In [45]:
def feature_influence_ranking(data, filename):
    X = data.data

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    pca = PCA(n_components=0.9)
    X_pca = pca.fit_transform(X_scaled)

    components = pca.components_
    variance_ratios = pca.explained_variance_ratio_

    weighted = np.abs(components * variance_ratios[:, np.newaxis])
    sorted_indices = np.argsort(np.max(weighted, axis=0))[::-1]

    with open(filename, 'wb') as f:
        pickle.dump(sorted_indices, f)

    with open(filename, 'rb') as f:
        print(pickle.load(f), '\n')

In [46]:
feature_influence_ranking(data_breast_cancer, 'idx_bc.pkl')
feature_influence_ranking(data_iris, 'idx_ir.pkl')

[ 7  6 27  5 22 26 20  2 23  3  0 12 25 10 13 17 15  9 16  4  8 29 24 28
 19 21  1 14 11 18] 

[2 3 0 1] 

