In [1]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, MinMaxScaler, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.decomposition import PCA
import pandas as pd

### Import the Data

In [2]:
# Load Iris dataset
iris = load_iris(as_frame=True)
data = iris.data
data['target'] = iris.target

In [4]:
data

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


### 1. Data Preparation

1.1 Scaling the Data using `StandardScaler()`

In [5]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data[['sepal length (cm)', 'sepal width (cm)']])

1.2 Handling Missing Values using `SimpleImputer()`

In [6]:
data_with_nan = data.copy()
data_with_nan.iloc[0, 0] = None  # Introduce a NaN value
imputer = SimpleImputer(strategy='mean')
data_with_nan_imputed = imputer.fit_transform(data_with_nan)

1.3 Separating Features and Labels using `pandas`

In [7]:
X = data.drop(columns=['target'])
y = data['target']

1.4 Feature Selection using `SelectKBest()`

In [8]:
selector = SelectKBest(chi2, k=2)
X_selected = selector.fit_transform(X, y)

1.5 Dimentionality Reduction using `PCA()`

In [9]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

Print the Data

In [10]:

print("Scaled Data:\n", data_scaled[:5])
print("Imputed Data:\n", data_with_nan_imputed[:5])
print("Selected Features:\n", X_selected[:5])
print("PCA Reduced Data:\n", X_pca[:5])

Scaled Data:
 [[-0.90068117  1.01900435]
 [-1.14301691 -0.13197948]
 [-1.38535265  0.32841405]
 [-1.50652052  0.09821729]
 [-1.02184904  1.24920112]]
Imputed Data:
 [[5.84832215 3.5        1.4        0.2        0.        ]
 [4.9        3.         1.4        0.2        0.        ]
 [4.7        3.2        1.3        0.2        0.        ]
 [4.6        3.1        1.5        0.2        0.        ]
 [5.         3.6        1.4        0.2        0.        ]]
Selected Features:
 [[1.4 0.2]
 [1.4 0.2]
 [1.3 0.2]
 [1.5 0.2]
 [1.4 0.2]]
PCA Reduced Data:
 [[-2.68412563  0.31939725]
 [-2.71414169 -0.17700123]
 [-2.88899057 -0.14494943]
 [-2.74534286 -0.31829898]
 [-2.72871654  0.32675451]]
