### Task 1: Handling Missing Values - Simple Imputation
**Description**: Given a dataset with missing values, impute the missing values using the mean for numerical features and the mode for categorical features.

In [1]:
import pandas as pd

# Sample dataset with missing values
data = {
    "Age": [25, 30, None, 22, 28, None],
    "Income": [50000, None, 62000, 58000, None, 54000],
    "Gender": ["Male", "Female", None, "Female", "Male", None],
    "City": ["NY", None, "LA", "LA", "NY", "NY"]
}

df = pd.DataFrame(data)

print("Original Data:")
print(df)

# Separate numerical and categorical columns
numerical_cols = df.select_dtypes(include=["number"]).columns
categorical_cols = df.select_dtypes(include=["object"]).columns

# Impute numerical columns with mean
for col in numerical_cols:
    mean_value = df[col].mean()
    df[col].fillna(mean_value, inplace=True)

# Impute categorical columns with mode
for col in categorical_cols:
    mode_value = df[col].mode()[0]  # mode() returns a Series
    df[col].fillna(mode_value, inplace=True)

print("\nData after Imputation:")
print(df)

Original Data:
    Age   Income  Gender  City
0  25.0  50000.0    Male    NY
1  30.0      NaN  Female  None
2   NaN  62000.0    None    LA
3  22.0  58000.0  Female    LA
4  28.0      NaN    Male    NY
5   NaN  54000.0    None    NY

Data after Imputation:
     Age   Income  Gender City
0  25.00  50000.0    Male   NY
1  30.00  56000.0  Female   NY
2  26.25  62000.0  Female   LA
3  22.00  58000.0  Female   LA
4  28.00  56000.0    Male   NY
5  26.25  54000.0  Female   NY


### Task 2: Feature Scaling - Min-Max Normalization
**Description**: Normalize a numerical feature using Min-Max scaling to a range [0, 1].

In [2]:
import pandas as pd

# Sample data
data = {
    "Age": [25, 30, 22, 28, 35, 40]
}

df = pd.DataFrame(data)

print("Original Data:")
print(df)

# Min-Max Normalization formula: (x - min) / (max - min)
df['Age_normalized'] = (df['Age'] - df['Age'].min()) / (df['Age'].max() - df['Age'].min())

print("\nData after Min-Max Normalization:")
print(df)

Original Data:
   Age
0   25
1   30
2   22
3   28
4   35
5   40

Data after Min-Max Normalization:
   Age  Age_normalized
0   25        0.166667
1   30        0.444444
2   22        0.000000
3   28        0.333333
4   35        0.722222
5   40        1.000000


### Task 3: Handling Missing Values - Drop Missing Values
**Description**: Remove rows with missing values from a dataset.

In [3]:
import pandas as pd

# Sample data with missing values
data = {
    "Age": [25, 30, None, 22, 28, None],
    "Income": [50000, None, 62000, 58000, None, 54000],
    "Gender": ["Male", "Female", None, "Female", "Male", None],
    "City": ["NY", None, "LA", "LA", "NY", "NY"]
}

df = pd.DataFrame(data)

print("Original Data:")
print(df)

# Drop rows with any missing values
df_cleaned = df.dropna()

print("\nData after dropping rows with missing values:")
print(df_cleaned)

Original Data:
    Age   Income  Gender  City
0  25.0  50000.0    Male    NY
1  30.0      NaN  Female  None
2   NaN  62000.0    None    LA
3  22.0  58000.0  Female    LA
4  28.0      NaN    Male    NY
5   NaN  54000.0    None    NY

Data after dropping rows with missing values:
    Age   Income  Gender City
0  25.0  50000.0    Male   NY
3  22.0  58000.0  Female   LA


### Task 4: Feature Scaling - Standardization
**Description**: Standardize a numerical feature to have zero mean and unit variance.

In [4]:
import pandas as pd

# Sample data
data = {
    "Age": [25, 30, 22, 28, 35, 40]
}

df = pd.DataFrame(data)

print("Original Data:")
print(df)

# Standardization formula: (x - mean) / std deviation
df['Age_standardized'] = (df['Age'] - df['Age'].mean()) / df['Age'].std()

print("\nData after Standardization:")
print(df)

Original Data:
   Age
0   25
1   30
2   22
3   28
4   35
5   40

Data after Standardization:
   Age  Age_standardized
0   25         -0.757228
1   30          0.000000
2   22         -1.211565
3   28         -0.302891
4   35          0.757228
5   40          1.514456
