### Task 1: Handling Missing Values - Simple Imputation
**Description**: Given a dataset with missing values, impute the missing values using the mean for numerical features and the mode for categorical features.

In [3]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

# Sample dataset with missing values
data = {
    "Age": [25, 30, None, 22, None, 40],
    "Income": [50000, None, 60000, 58000, 52000, None],
    "Gender": ["Male", "Female", None, "Female", "Male", None],
}

df = pd.DataFrame(data)

# Replace None with np.nan for proper handling
df["Gender"] = df["Gender"].replace({None: np.nan})

# Convert columns to proper dtype before imputing
df["Age"] = pd.to_numeric(df["Age"], errors="coerce")
df["Income"] = pd.to_numeric(df["Income"], errors="coerce")

# Impute numerical columns with mean
num_imputer = SimpleImputer(strategy="mean")
df[["Age", "Income"]] = num_imputer.fit_transform(df[["Age", "Income"]])

# Impute categorical columns with mode
cat_imputer = SimpleImputer(strategy="most_frequent")
df[["Gender"]] = cat_imputer.fit_transform(df[["Gender"]])

print(df)

     Age   Income  Gender
0  25.00  50000.0    Male
1  30.00  55000.0  Female
2  29.25  60000.0  Female
3  22.00  58000.0  Female
4  29.25  52000.0    Male
5  40.00  55000.0  Female


### Task 2: Feature Scaling - Min-Max Normalization
**Description**: Normalize a numerical feature using Min-Max scaling to a range [0, 1].

In [4]:
# write your code from here
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Sample data
data = {
    "Age": [25, 30, 22, 40, 28, 35]
}
df = pd.DataFrame(data)

# Initialize MinMaxScaler to scale features between 0 and 1
scaler = MinMaxScaler()

# Fit and transform the "Age" column
df["Age_normalized"] = scaler.fit_transform(df[["Age"]])

print(df)


   Age  Age_normalized
0   25        0.166667
1   30        0.444444
2   22        0.000000
3   40        1.000000
4   28        0.333333
5   35        0.722222


### Task 3: Handling Missing Values - Drop Missing Values
**Description**: Remove rows with missing values from a dataset.

In [5]:
import pandas as pd
import numpy as np

# Sample data with missing values
data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eva"],
    "Age": [25, np.nan, 30, 22, np.nan],
    "Gender": ["F", "M", None, "M", "F"]
}
df = pd.DataFrame(data)

# Drop rows with any missing values
df_cleaned = df.dropna()

print(df_cleaned)

    Name   Age Gender
0  Alice  25.0      F
3  David  22.0      M


### Task 4: Feature Scaling - Standardization
**Description**: Standardize a numerical feature to have zero mean and unit variance.

In [6]:
# write your code from here

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Sample data
data = {
    "Age": [25, 30, 22, 35, 28]
}
df = pd.DataFrame(data)

# Standardize the 'Age' column
scaler = StandardScaler()
df["Age_standardized"] = scaler.fit_transform(df[["Age"]])

print(df)

   Age  Age_standardized
0   25         -0.677631
1   30          0.451754
2   22         -1.355262
3   35          1.581139
4   28          0.000000
