In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Sample dataset with missing values
data = {
    'Age': [25, 30, None, 22, 28],
    'Salary': [50000, 60000, 52000, None, 58000],
    'Experience': [1, 3, 2, 4, None]
}

df = pd.DataFrame(data)

print("Original Data:")
print(df)

# Step 1: Handle missing values
# Option 1: Fill missing values with the mean of each column
df_filled = df.fillna(df.mean(numeric_only=True))

print("\nAfter Handling Missing Values (Filled with Mean):")
print(df_filled)

# Step 2: Normalize the data using Min-Max scaling
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(df_filled)

# Convert back to DataFrame for readability
df_normalized = pd.DataFrame(normalized_data, columns=df.columns)

print("\nAfter Normalization (Min-Max Scaling):")
print(df_normalized)


Original Data:
    Age   Salary  Experience
0  25.0  50000.0         1.0
1  30.0  60000.0         3.0
2   NaN  52000.0         2.0
3  22.0      NaN         4.0
4  28.0  58000.0         NaN

After Handling Missing Values (Filled with Mean):
     Age   Salary  Experience
0  25.00  50000.0         1.0
1  30.00  60000.0         3.0
2  26.25  52000.0         2.0
3  22.00  55000.0         4.0
4  28.00  58000.0         2.5

After Normalization (Min-Max Scaling):
       Age  Salary  Experience
0  0.37500     0.0    0.000000
1  1.00000     1.0    0.666667
2  0.53125     0.2    0.333333
3  0.00000     0.5    1.000000
4  0.75000     0.8    0.500000
