In [None]:
#Boyut Azaltma

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#sklearn iris veri seti
from sklearn.datasets import load_iris

#iris veri setini yükle
iris = load_iris()

data = iris.data
feature_names = iris.feature_names
y = iris.target

#dataframe oluştur
df = pd.DataFrame(data, columns=feature_names)
df["Class"] = y
df.head()

In [None]:
#Temel Bileşen Analizi (PCA) ile boyut azaltma 
from sklearn.decomposition import PCA
pca = PCA(n_components=2, whiten=True) #whiten=True veri setini normalize eder

x_pca = pca.fit_transform(data)

print("Varience Ratio: ", pca.explained_variance_ratio_)
print("Sum: ", sum(pca.explained_variance_ratio_))

In [None]:
#temel bileşenleri görselleştir

df["p1"] = x_pca[:,0]  #1 boyutlu hale getirildi
df["p2"] = x_pca[:,1]  #2 boyutlu hale getirildi

color = ["red", "green", "blue"]

for each in range(3):
    plt.scatter(df.p1[df.Class == each], df.p2[df.Class == each], color=color[each], label=iris.target_names[each]) #sınıflara göre renklendir

plt.xlabel("p1")
plt.ylabel("p2")
plt.legend()
plt.show()

In [None]:
#eğer data çok karışık ise datayı kernel pca ile boyut azaltma yapabiliriz
#kernel pca

from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split

X ,y = make_circles(n_samples=1_000, random_state=0, noise=0.05, factor=0.3)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) #veri setini train ve test olarak ayır

import matplotlib.pyplot as plt

_, (train_ax, test_ax) = plt.subplots(ncols=2 ,sharex=True, sharey=True, figsize=(8, 4))

train_ax.scatter(X_train[:,0],X_train[:,1], c=y_train)
train_ax.set_xlabel('X1')
train_ax.set_ylabel('X2')
train_ax.set_title('Train Set')

test_ax.scatter(X_test[:,0],X_test[:,1], c=y_test)
test_ax.set_xlabel('X1')
_ = test_ax.set_title('Test Set')

In [None]:
from sklearn.decomposition import KernelPCA

pca = PCA(n_components=2) #normal pca
kpca = KernelPCA(n_components=None, kernel='rbf',fit_inverse_transform=True, gamma=10 ,alpha=0.1)   #kernel pca 

X_test_pca = pca.fit_transform(X_train, X_test)
X_test_kpca = kpca.fit_transform(X_train, X_test)



In [None]:
fig, (orig_ax, pca_ax, kpca_ax) = plt.subplots(ncols=3, figsize=(14, 4))

# scatter plot with consistent 'c' argument
orig_ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test[:len(X_test)], s=50, cmap='viridis', alpha=0.5)  # Fix: Use y_test[:len(X_test)] to match the size of X_test
orig_ax.set_ylabel('öznitelik 1')
orig_ax.set_xlabel('öznitelik 2')
orig_ax.set_title('Original Data')

pca_ax.scatter(X_test_pca[:, 0], X_test_pca[:, 1], c=y_test[:len(X_test_pca)], s=50)  # Fix: Use y_test[:len(X_test_pca)] to match the size of X_test_pca
pca_ax.set_xlabel('1. bileşen')
pca_ax.set_ylabel('2. bileşen')
pca_ax.set_title('PCA')

kpca_ax.scatter(X_test_kpca[:, 0], X_test_kpca[:, 1], c=y_test[:len(X_test_kpca)], s=50)  # Fix: Use y_test[:len(X_test_kpca)] to match the size of X_test_kpca
kpca_ax.set_xlabel('1. bileşen')
kpca_ax.set_ylabel('2. bileşen')
kpca_ax.set_title('Kernel PCA')
kpca_ax.set_ylabel('2. bileşen')
_ = kpca_ax.set_title('Kernel PCA')




In [11]:
#varyans temelli öznitelik seçimi
from sklearn.feature_selection import VarianceThreshold

X = [[0,0,1], [0,1,0], [1,0,0], [0,1,1], [0,1,0], [0,1,1]]

sel = VarianceThreshold(threshold=(.8 * (1 - .8))) #varyansı 0.8 den büyük olanları seç 
sel.fit_transform(X)


array([[0, 1],
       [1, 0],
       [0, 0],
       [1, 1],
       [1, 0],
       [1, 1]])

In [12]:
 #istatiksel model seçimi
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

X, y = load_iris(return_X_y=True)
X.shape


(150, 4)

In [13]:
X_new = SelectKBest(chi2, k=2).fit_transform(X, y)
X_new.shape

(150, 2)

In [14]:
 #model tabanlı öznitelik seçimi
from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel

X, y = load_iris(return_X_y=True)
X.shape




(150, 4)

In [15]:
#l1 tabanlı öznitelik seçimi
lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
X_new = model.transform(X)
X_new.shape



(150, 3)

In [18]:
#l2 tabanlı öznitelik seçimi
lsvs = LinearSVC(C=0.01, penalty="l2", dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
X_new = model.transform(X)
X_new.shape

(150, 3)

In [19]:
#ağaç tabanlı öznitelik seçimi
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel
 
X, y = load_iris(return_X_y=True)
X.shape


(150, 4)

In [21]:
clf = ExtraTreesClassifier(n_estimators=50)
clf = clf.fit(X, y)
clf.feature_importances_

array([0.09279821, 0.07315838, 0.39366687, 0.44037654])

In [22]:
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(X)
X_new.shape

(150, 2)