# Завдання
Застосувати методи відбору ознак до датасету `iris`

## Рішення

In [27]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import VarianceThreshold, SelectKBest, SelectFromModel, f_classif
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [28]:
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2023)

### VarianceThreshold

In [29]:
# використовуємо VarianceThreshold для відбору ознак
selector = VarianceThreshold(threshold=0.3)
X_train_sel = selector.fit_transform(X_train)
X_test_sel = selector.transform(X_test)
X_test_sel

array([[6.4, 5.6, 2.1],
       [6.4, 4.3, 1.3],
       [5.7, 4.1, 1.3],
       [7.3, 6.3, 1.8],
       [6.8, 4.8, 1.4],
       [5.7, 5. , 2. ],
       [5.4, 4.5, 1.5],
       [5.7, 4.2, 1.3],
       [5. , 1.4, 0.2],
       [6. , 4. , 1. ],
       [4.8, 1.6, 0.2],
       [6.7, 4.7, 1.5],
       [4.5, 1.3, 0.3],
       [6.4, 5.5, 1.8],
       [5.1, 1.4, 0.3],
       [6.9, 5.7, 2.3],
       [5.2, 1.5, 0.1],
       [4.9, 3.3, 1. ],
       [5. , 1.2, 0.2],
       [5.4, 1.3, 0.4],
       [5.6, 4.2, 1.3],
       [4.7, 1.3, 0.2],
       [7.7, 6.7, 2. ],
       [7. , 4.7, 1.4],
       [5.1, 1.7, 0.5],
       [5.8, 1.2, 0.2],
       [5.1, 1.5, 0.4],
       [7.7, 6.1, 2.3],
       [5.5, 3.7, 1. ],
       [5.3, 1.5, 0.2]])

### SelectKBest

In [30]:
# використовуємо SelectKBest для відбору ознак
selector = SelectKBest(f_classif, k=3)
X_train_sel = selector.fit_transform(X_train, y_train)
X_test_sel = selector.transform(X_test)
X_test_sel

array([[6.4, 5.6, 2.1],
       [6.4, 4.3, 1.3],
       [5.7, 4.1, 1.3],
       [7.3, 6.3, 1.8],
       [6.8, 4.8, 1.4],
       [5.7, 5. , 2. ],
       [5.4, 4.5, 1.5],
       [5.7, 4.2, 1.3],
       [5. , 1.4, 0.2],
       [6. , 4. , 1. ],
       [4.8, 1.6, 0.2],
       [6.7, 4.7, 1.5],
       [4.5, 1.3, 0.3],
       [6.4, 5.5, 1.8],
       [5.1, 1.4, 0.3],
       [6.9, 5.7, 2.3],
       [5.2, 1.5, 0.1],
       [4.9, 3.3, 1. ],
       [5. , 1.2, 0.2],
       [5.4, 1.3, 0.4],
       [5.6, 4.2, 1.3],
       [4.7, 1.3, 0.2],
       [7.7, 6.7, 2. ],
       [7. , 4.7, 1.4],
       [5.1, 1.7, 0.5],
       [5.8, 1.2, 0.2],
       [5.1, 1.5, 0.4],
       [7.7, 6.1, 2.3],
       [5.5, 3.7, 1. ],
       [5.3, 1.5, 0.2]])

### SelectFromModel

In [31]:
# використовуємо SelectFromModel для відбору ознак
clf = RandomForestClassifier(n_estimators=100, random_state=2023)
selector = SelectFromModel(clf, threshold="median")
X_train_sel = selector.fit_transform(X_train, y_train)
X_test_sel = selector.transform(X_test)
X_test_sel

array([[5.6, 2.1],
       [4.3, 1.3],
       [4.1, 1.3],
       [6.3, 1.8],
       [4.8, 1.4],
       [5. , 2. ],
       [4.5, 1.5],
       [4.2, 1.3],
       [1.4, 0.2],
       [4. , 1. ],
       [1.6, 0.2],
       [4.7, 1.5],
       [1.3, 0.3],
       [5.5, 1.8],
       [1.4, 0.3],
       [5.7, 2.3],
       [1.5, 0.1],
       [3.3, 1. ],
       [1.2, 0.2],
       [1.3, 0.4],
       [4.2, 1.3],
       [1.3, 0.2],
       [6.7, 2. ],
       [4.7, 1.4],
       [1.7, 0.5],
       [1.2, 0.2],
       [1.5, 0.4],
       [6.1, 2.3],
       [3.7, 1. ],
       [1.5, 0.2]])

### PCA

In [32]:
# використовуємо PCA для зменшення розмірності
pca = PCA(n_components=2)
X_train_sel = pca.fit_transform(X_train)
X_test_sel = pca.transform(X_test)
X_test_sel

array([[ 2.04822331, -0.20184399],
       [ 0.63751654,  0.15196658],
       [ 0.22140826, -0.33426981],
       [ 2.85430616,  0.34464607],
       [ 1.25350068,  0.23860189],
       [ 1.26917119, -0.7613683 ],
       [ 0.51248441, -0.45884507],
       [ 0.2991693 , -0.27692098],
       [-2.8019275 ,  0.36003687],
       [ 0.18094431, -0.55435257],
       [-2.66481629, -0.17607664],
       [ 1.14535908,  0.40993042],
       [-2.93150667, -0.91977067],
       [ 1.83014741,  0.05961558],
       [-2.72159218,  0.34244288],
       [ 2.35657777,  0.38537078],
       [-2.71919484,  0.85051986],
       [-0.83001678, -0.98430415],
       [-2.94206541,  0.09582547],
       [-2.69291494,  0.84355435],
       [ 0.27842573, -0.49019125],
       [-2.96499548, -0.11285213],
       [ 3.42135268,  0.4420958 ],
       [ 1.20953468,  0.68255048],
       [-2.37681762,  0.12751772],
       [-2.71458024,  1.20393261],
       [-2.61500771,  0.46795495],
       [ 3.00279628,  0.67627696],
       [-0.27143789,

### SVD

In [33]:
# використовуємо SVD для зменшення розмірності
svd = TruncatedSVD(n_components=2)
X_train_sel = svd.fit_transform(X_train)
X_test_sel = svd.transform(X_test)
X_test_sel

array([[ 9.10668138e+00, -1.27527439e+00],
       [ 8.33310736e+00, -2.62967623e-02],
       [ 7.66833689e+00, -1.44791229e-01],
       [ 1.01286444e+01, -1.34418167e+00],
       [ 8.87071589e+00, -3.50432611e-01],
       [ 8.14175778e+00, -1.18674255e+00],
       [ 7.76140330e+00, -4.74578418e-01],
       [ 7.75798190e+00, -1.60481402e-01],
       [ 5.85876162e+00,  2.37060784e+00],
       [ 7.56241400e+00, -2.08846994e-01],
       [ 5.62392804e+00,  1.89889912e+00],
       [ 8.87470162e+00, -1.80854297e-01],
       [ 4.95889605e+00,  1.55040984e+00],
       [ 9.11671298e+00, -9.35906931e-01],
       [ 5.91289141e+00,  2.31010695e+00],
       [ 9.71786012e+00, -1.05155261e+00],
       [ 6.23194691e+00,  2.66671274e+00],
       [ 6.45281629e+00,  7.07679038e-02],
       [ 5.60386751e+00,  2.29286862e+00],
       [ 6.25369775e+00,  2.65106481e+00],
       [ 7.60762317e+00, -2.98805912e-01],
       [ 5.43144655e+00,  2.13500383e+00],
       [ 1.06315871e+01, -1.63321361e+00],
       [ 9.

### LDA

In [34]:
# використовуємо LDA для зменшення розмірності
lda = LinearDiscriminantAnalysis(n_components=2)
X_train_sel = lda.fit_transform(X_train, y_train)
X_test_sel = lda.transform(X_test)
X_test_sel

array([[-6.25640358e+00, -1.83186908e-01],
       [-1.05367026e+00,  6.28140267e-01],
       [-1.32198797e+00,  6.12163944e-01],
       [-6.03323738e+00,  9.68828823e-01],
       [-2.28498003e+00,  1.03833441e+00],
       [-5.76725507e+00,  2.32497767e-01],
       [-2.58188874e+00, -1.13832073e-01],
       [-1.34772501e+00,  4.48266081e-01],
       [ 8.46077539e+00, -6.02192204e-01],
       [-1.14074751e+00,  2.74697949e+00],
       [ 6.97738731e+00,  6.91579137e-01],
       [-1.88175262e+00, -5.52195292e-03],
       [ 5.63218206e+00,  2.06918413e+00],
       [-4.63510506e+00, -2.18981827e-01],
       [ 8.06222342e+00, -5.98922072e-01],
       [-5.94335354e+00, -1.47462404e+00],
       [ 9.60148862e+00, -1.43134116e+00],
       [-9.62880633e-02,  1.60054233e+00],
       [ 8.14425674e+00,  1.97702092e-01],
       [ 8.93732614e+00, -1.81727186e+00],
       [-1.78768747e+00,  9.04975686e-01],
       [ 7.71861414e+00,  2.23686616e-01],
       [-7.34133909e+00,  1.05870343e+00],
       [-1.