In [2]:
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.linear_model import LinearRegression

# Sample dataset
data = {
    'Age': [25, 27, np.nan, 29, 31, np.nan, 35],
    'Gender': ['Male', 'Female', 'Female', np.nan, 'Male', 'Female', np.nan],
    'Income': [50000, 54000, 58000, 62000, np.nan, 70000, 73000]
}

df = pd.DataFrame(data)

# 1. Check for missing values
print("Missing Values:\n", df.isnull().sum())

# 2. Drop rows with missing values
df_dropped_rows = df.dropna()

# 3. Drop columns with missing values
df_dropped_columns = df.dropna(axis=1)

# 4. Mean Imputation
df_mean = df.copy()
df_mean['Age'] = df_mean['Age'].fillna(df_mean['Age'].mean())

# 5. Mode Imputation
df_mode = df.copy()
mode_gender = df_mode['Gender'].mode()[0]
df_mode['Gender'] = df_mode['Gender'].fillna(mode_gender)

# 6. Median Imputation
df_median = df.copy()
df_median['Income'] = df_median['Income'].fillna(df_median['Income'].median())

# 7. KNN Imputation (convert Gender to numeric before imputation)
df_knn = df.copy()
df_knn['Gender'] = df_knn['Gender'].map({'Male': 1, 'Female': 0})
imputer = KNNImputer(n_neighbors=2)
df_knn_imputed = pd.DataFrame(imputer.fit_transform(df_knn), columns=df_knn.columns)
df_knn_imputed['Gender'] = df_knn_imputed['Gender'].round().map({1: 'Male', 0: 'Female'})

# 8. Handle missing categorical data with next frequent category
df_freq = df.copy()
top_two = df_freq['Gender'].value_counts().index[:2]
if len(top_two) > 1:
    second_mode = top_two[1]
    df_freq['Gender'] = df_freq['Gender'].fillna(second_mode)

# 9. Predictive Modeling Imputation (for Income)
df_model = df.copy()
# Encode gender
df_model['Gender'] = df_model['Gender'].map({'Male': 1, 'Female': 0})
# Drop rows with missing Age or Gender in the training data
train_data = df_model.dropna(subset=['Income', 'Age', 'Gender'])
test_data = df_model[df_model['Income'].isnull()]
X_train = train_data[['Age', 'Gender']]
y_train = train_data['Income']
X_test = test_data[['Age', 'Gender']].dropna()
# Align indices
valid_index = X_test.index
reg = LinearRegression()
reg.fit(X_train, y_train)
preds = reg.predict(X_test)
df_model.loc[valid_index, 'Income'] = preds
# Decode gender
df_model['Gender'] = df_model['Gender'].map({1: 'Male', 0: 'Female'})

# 10. Forward and Backward Fill for Time Series
ts_df = pd.DataFrame({
    'Date': pd.date_range(start='2023-01-01', periods=7),
    'Value': [1, np.nan, 3, np.nan, 5, np.nan, 7]
})
ts_df.sort_values('Date', inplace=True)
ts_df['Forward_Fill'] = ts_df['Value'].fillna(method='ffill')
ts_df['Backward_Fill'] = ts_df['Value'].fillna(method='bfill')

Missing Values:
 Age       2
Gender    2
Income    1
dtype: int64


  ts_df['Forward_Fill'] = ts_df['Value'].fillna(method='ffill')
  ts_df['Backward_Fill'] = ts_df['Value'].fillna(method='bfill')
