In [8]:
import pandas as pd  
import numpy as np  
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler  

In [9]:
data = {  
    'Age': [25, 30, np.nan, 40, 35],  
    'Salary': [50000, 60000, 55000, np.nan, 65000],  
    'Department': ['HR', 'IT', 'Finance', 'IT', np.nan],  
    'Purchased': ['Yes', 'No', 'Yes', 'No', 'Yes']  
}

In [10]:
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
    Age   Salary Department Purchased
0  25.0  50000.0         HR       Yes
1  30.0  60000.0         IT        No
2   NaN  55000.0    Finance       Yes
3  40.0      NaN         IT        No
4  35.0  65000.0        NaN       Yes


In [11]:
# Handle Missing Values
if 'Age' in df.columns:
    df['Age'] = df['Age'].fillna(df['Age'].mean())

if 'Salary' in df.columns:
    df['Salary'] = df['Salary'].fillna(df['Salary'].mean())

if 'Department' in df.columns:
    df['Department'] = df['Department'].fillna(df['Department'].mode()[0])

print("\nAfter Handling Missing Values:")
print(df)


After Handling Missing Values:
    Age   Salary Department Purchased
0  25.0  50000.0         HR       Yes
1  30.0  60000.0         IT        No
2  32.5  55000.0    Finance       Yes
3  40.0  57500.0         IT        No
4  35.0  65000.0         IT       Yes


In [12]:
# Encoding
labelencoder = LabelEncoder()
df['Purchased'] = labelencoder.fit_transform(df['Purchased'])  # Yes=1, No=0

df = pd.get_dummies(df, columns=['Department'], drop_first=True)

print("\nAfter Encoding:")
print(df)


After Encoding:
    Age   Salary  Purchased  Department_HR  Department_IT
0  25.0  50000.0          1           True          False
1  30.0  60000.0          0          False           True
2  32.5  55000.0          1          False          False
3  40.0  57500.0          0          False           True
4  35.0  65000.0          1          False           True


In [13]:
# Normalization
scaler = MinMaxScaler()
df[['Age', 'Salary']] = scaler.fit_transform(df[['Age', 'Salary']])

print("\nAfter Normalization:")
print(df)



After Normalization:
        Age    Salary  Purchased  Department_HR  Department_IT
0  0.000000  0.000000          1           True          False
1  0.333333  0.666667          0          False           True
2  0.500000  0.333333          1          False          False
3  1.000000  0.500000          0          False           True
4  0.666667  1.000000          1          False           True
