In [5]:
# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

# Step 2: Load Dataset
from sklearn.datasets import load_iris
iris = load_iris()

# Convert to pandas DataFrame
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Step 3: View Dataset Info
print("First 5 Rows of Data:")
print(df.head())

# Step 4: Check for Missing Values
print("\nMissing Values:")
print(df.isnull().sum())

# (Optional) Introduce some missing values for demonstration
df.loc[5:10, 'sepal length (cm)'] = np.nan

# Step 5: Impute Missing Values
imputer = SimpleImputer(strategy='mean')
df[['sepal length (cm)']] = imputer.fit_transform(df[['sepal length (cm)']])

# Step 6: Encode Target Labels (if not already encoded)
# (In this case, already encoded as 0, 1, 2)

# Step 7: Feature Scaling (Standardization)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('target', axis=1))
df_scaled = pd.DataFrame(scaled_features, columns=iris.feature_names)
df_scaled['target'] = df['target']

# Step 8: Split Dataset (80% train, 20% test)
X = df_scaled.drop('target', axis=1)
y = df_scaled['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Final Output
print("\nTraining Features Shape:", X_train.shape)
print("Test Features Shape:", X_test.shape)


First 5 Rows of Data:
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  

Missing Values:
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64

Training Features Shape: (120, 4)
Test Features Shape: (30, 4)
