### Task 1: Handling Missing Values - Simple Imputation
**Description**: Given a dataset with missing values, impute the missing values using the mean for numerical features and the mode for categorical features.

In [13]:
# write your code from here
import pandas as pd
import numpy as np

def handle_missing_values_simple_imputation(df):
    """
    Imputes missing values in a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame with missing values.

    Returns:
        pd.DataFrame: A new DataFrame with missing values imputed.
                      Numerical features are imputed with the mean,
                      and categorical/object features are imputed with the mode.
    """
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].fillna(df[col].mean())
        elif pd.api.types.is_categorical_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):
            df[col] = df[col].fillna(df[col].mode()[0])
    return df

if __name__ == '__main__':
    data = {'numerical_1': [1, 2, np.nan, 4, 5],
            'numerical_2': [10, 20, 30, np.nan, 50],
            'categorical_1': ['A', 'B', np.nan, 'A', 'C'],
            'categorical_2': ['X', 'Y', 'Z', 'X', np.nan]}
    df = pd.DataFrame(data)

    print("Original DataFrame:")
    print(df)

    df_imputed = handle_missing_values_simple_imputation(df.copy())
    print("\nDataFrame with Imputed Values:")
    print(df_imputed)


Original DataFrame:
   numerical_1  numerical_2 categorical_1 categorical_2
0          1.0         10.0             A             X
1          2.0         20.0             B             Y
2          NaN         30.0           NaN             Z
3          4.0          NaN             A             X
4          5.0         50.0             C           NaN

DataFrame with Imputed Values:
   numerical_1  numerical_2 categorical_1 categorical_2
0          1.0         10.0             A             X
1          2.0         20.0             B             Y
2          3.0         30.0             A             Z
3          4.0         27.5             A             X
4          5.0         50.0             C             X


  elif pd.api.types.is_categorical_dtype(df[col]) or pd.api.types.is_object_dtype(df[col]):


### Task 2: Feature Scaling - Min-Max Normalization
**Description**: Normalize a numerical feature using Min-Max scaling to a range [0, 1].

In [14]:
# write your code from here
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def feature_scaling_min_max_normalization(series):
    """
    Normalizes a numerical pandas Series using Min-Max scaling to the range [0, 1].

    Args:
        series (pd.Series): The input numerical pandas Series to be normalized.

    Returns:
        pd.Series: A new pandas Series with values scaled to the range [0, 1].
    """
    scaler = MinMaxScaler()
    return pd.Series(scaler.fit_transform(series.values.reshape(-1, 1)).flatten(), index=series.index)

if __name__ == '__main__':
    data = {'numerical_feature': [10, 20, 30, 40, 50, -10]}
    df = pd.DataFrame(data)
    numerical_series = df['numerical_feature']

    print("Original Series:")
    print(numerical_series)

    normalized_series = feature_scaling_min_max_normalization(numerical_series)
    print("\nNormalized Series (Min-Max Scaling):")
    print(normalized_series)


Original Series:
0    10
1    20
2    30
3    40
4    50
5   -10
Name: numerical_feature, dtype: int64

Normalized Series (Min-Max Scaling):
0    0.333333
1    0.500000
2    0.666667
3    0.833333
4    1.000000
5    0.000000
dtype: float64


### Task 3: Handling Missing Values - Drop Missing Values
**Description**: Remove rows with missing values from a dataset.

In [15]:
# write your code from here
import pandas as pd
import numpy as np

def handle_missing_values_drop(df):
    """
    Removes rows with any missing values from a DataFrame.

    Args:
        df (pd.DataFrame): The input DataFrame.

    Returns:
        pd.DataFrame: A new DataFrame with rows containing missing values removed.
    """
    return df.dropna()

if __name__ == '__main__':
    data = {'col1': [1, 2, np.nan, 4, 5],
            'col2': [10, np.nan, 30, 40, 50],
            'col3': ['A', 'B', 'C', np.nan, 'E']}
    df = pd.DataFrame(data)

    print("Original DataFrame:")
    print(df)

    df_dropped = handle_missing_values_drop(df.copy())
    print("\nDataFrame after dropping rows with missing values:")
    print(df_dropped)


Original DataFrame:
   col1  col2 col3
0   1.0  10.0    A
1   2.0   NaN    B
2   NaN  30.0    C
3   4.0  40.0  NaN
4   5.0  50.0    E

DataFrame after dropping rows with missing values:
   col1  col2 col3
0   1.0  10.0    A
4   5.0  50.0    E


### Task 4: Feature Scaling - Standardization
**Description**: Standardize a numerical feature to have zero mean and unit variance.

In [16]:
# write your code from here
import pandas as pd
from sklearn.preprocessing import StandardScaler

def feature_scaling_standardization(series):
    """
    Standardizes a numerical pandas Series to have zero mean and unit variance.

    Args:
        series (pd.Series): The input numerical pandas Series to be standardized.

    Returns:
        pd.Series: A new pandas Series with standardized values.
    """
    scaler = StandardScaler()
    return pd.Series(scaler.fit_transform(series.values.reshape(-1, 1)).flatten(), index=series.index)

if __name__ == '__main__':
    data = {'numerical_feature': [10, 20, 30, 40, 50, -10]}
    df = pd.DataFrame(data)
    numerical_series = df['numerical_feature']

    print("Original Series:")
    print(numerical_series)

    standardized_series = feature_scaling_standardization(numerical_series)
    print("\nStandardized Series (Zero Mean, Unit Variance):")
    print(standardized_series)


Original Series:
0    10
1    20
2    30
3    40
4    50
5   -10
Name: numerical_feature, dtype: int64

Standardized Series (Zero Mean, Unit Variance):
0   -0.676123
1   -0.169031
2    0.338062
3    0.845154
4    1.352247
5   -1.690309
dtype: float64
