### Task 1: Handling Missing Values - Simple Imputation
**Description**: Given a dataset with missing values, impute the missing values using the mean for numerical features and the mode for categorical features.

In [1]:
import pandas as pd
from sklearn.impute import SimpleImputer
data = {
    'age': [25, 30, None, 22, 28],
    'gender': ['male', 'female', 'female', None, 'male'],
    'income': [50000, 60000, 55000, None, 58000]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
num_imputer = SimpleImputer(strategy='mean')
df['age'] = num_imputer.fit_transform(df[['age']])
df['income'] = num_imputer.fit_transform(df[['income']])
cat_imputer = SimpleImputer(strategy='most_frequent')
df['gender'] = cat_imputer.fit_transform(df[['gender']]).ravel()  # Use ravel() to convert to 1D
print("\nDataFrame after Simple Imputation:")
print(df)


Original DataFrame:
    age  gender   income
0  25.0    male  50000.0
1  30.0  female  60000.0
2   NaN  female  55000.0
3  22.0    None      NaN
4  28.0    male  58000.0

DataFrame after Simple Imputation:
     age  gender   income
0  25.00    male  50000.0
1  30.00  female  60000.0
2  26.25  female  55000.0
3  22.00    None  55750.0
4  28.00    male  58000.0


### Task 2: Feature Scaling - Min-Max Normalization
**Description**: Normalize a numerical feature using Min-Max scaling to a range [0, 1].

In [2]:

import numpy as np
import pandas as pd
def min_max_normalize(data):
  min_val = np.min(data)
  max_val = np.max(data)
  if min_val == max_val:
    print("Warning: Minimum and maximum values are the same. Returning original data.")
    return data
  else:
    normalized_data = (data - min_val) / (max_val - min_val)
    return normalized_data
example_series = pd.Series([10, 20, 30, 40, 50])
normalized_series = min_max_normalize(example_series)
print("Original Series:")
print(example_series)
print("\nNormalized Series (Min-Max):")
print(normalized_series)
example_array = np.array([5, 10, 15, 20])
normalized_array = min_max_normalize(example_array)
print("\nOriginal Array:")
print(example_array)
print("\nNormalized Array (Min-Max):")
print(normalized_array)

Original Series:
0    10
1    20
2    30
3    40
4    50
dtype: int64

Normalized Series (Min-Max):
0    0.00
1    0.25
2    0.50
3    0.75
4    1.00
dtype: float64

Original Array:
[ 5 10 15 20]

Normalized Array (Min-Max):
[0.         0.33333333 0.66666667 1.        ]


### Task 3: Handling Missing Values - Drop Missing Values
**Description**: Remove rows with missing values from a dataset.

In [3]:

import pandas as pd
def drop_missing_rows(df):
  df_cleaned = df.dropna()
  return df_cleaned
data = {'col1': [1, 2, np.nan, 4, 5],
        'col2': ['a', np.nan, 'c', 'd', 'e'],
        'col3': [True, False, True, np.nan, False]}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
df_without_missing = drop_missing_rows(df)
print("\nDataFrame after dropping rows with missing values:")
print(df_without_missing)

Original DataFrame:
   col1 col2   col3
0   1.0    a   True
1   2.0  NaN  False
2   NaN    c   True
3   4.0    d    NaN
4   5.0    e  False

DataFrame after dropping rows with missing values:
   col1 col2   col3
0   1.0    a   True
4   5.0    e  False


### Task 4: Feature Scaling - Standardization
**Description**: Standardize a numerical feature to have zero mean and unit variance.

In [4]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
data = {
    'feature': [10, 20, 30, 40, 50]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)
scaler = StandardScaler()
df['standardized_feature'] = scaler.fit_transform(df[['feature']])
print("\nDataFrame after Standardization:")
print(df)


Original DataFrame:
   feature
0       10
1       20
2       30
3       40
4       50

DataFrame after Standardization:
   feature  standardized_feature
0       10             -1.414214
1       20             -0.707107
2       30              0.000000
3       40              0.707107
4       50              1.414214
