In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler


data = {
    'Age': [25, 30, 28, None, 35],  # Missing value in Age
    'Gender': ['Female', 'Male', 'Male', 'Female', 'Male'],
    'Income': [50000, 60000, 45000, 70000, None]  # Missing value in Income
}

df = pd.DataFrame(data)

# Handling Missing Data (Replacing with Mean)
imputer = SimpleImputer(strategy='mean')
df[['Age', 'Income']] = imputer.fit_transform(df[['Age', 'Income']])

print("Data after handling missing values:")
print(df)

# Encoding Categorical Variables
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_data = encoder.fit_transform(df[['Gender']])

# Convert encoded data to DataFrame
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(['Gender']))

# Merging with Original Data
df = df.drop('Gender', axis=1)
df = pd.concat([df, encoded_df], axis=1)

print("\nData after categorical encoding:")
print(df)

# Feature Scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['Age', 'Income']])

# Convert scaled data to DataFrame
scaled_df = pd.DataFrame(scaled_data, columns=['Scaled Age', 'Scaled Income'])

print("\nData after feature scaling:")
print(scaled_df)

Data after handling missing values:
    Age  Gender   Income
0  25.0  Female  50000.0
1  30.0    Male  60000.0
2  28.0    Male  45000.0
3  29.5  Female  70000.0
4  35.0    Male  56250.0

Data after categorical encoding:
    Age   Income  Gender_Male
0  25.0  50000.0          0.0
1  30.0  60000.0          1.0
2  28.0  45000.0          1.0
3  29.5  70000.0          0.0
4  35.0  56250.0          1.0

Data after feature scaling:
   Scaled Age  Scaled Income
0   -1.382164      -0.727778
1    0.153574       0.436667
2   -0.460721      -1.310001
3    0.000000       1.601112
4    1.689312       0.000000


