In [4]:
import pandas as pd
import numpy as np

# Create sample dataset
data = {
    "Square_Feet_Area": [8500, 9600, np.nan, 11250, np.nan, 9550, 14260, np.nan, 1830, 11500],
    "Year_Built": [2003, 2001, np.nan, 1990, 2000, 2006, 1978, 1950, np.nan, 2020],
    "OverAll_Condition": [5, 8, 6, 7, np.nan, 7, 8, 6, np.nan, 9],
    "Ready_to_move": ["Yes", "No", "Yes", np.nan, "No", np.nan, "Yes", "Yes", "No", "Yes"],
    "Sale_Price": [200000, 180000, 215000, 210000, 190000, 250000, 225000, 220000, 240000, 230000]
}

df = pd.DataFrame(data)

# Print Original DataFrame
print("Original DataFrame")
print(df)

# Replacing missing values with the mean for numeric columns
df = df.assign(
    Square_Feet_Area=df['Square_Feet_Area'].fillna(df['Square_Feet_Area'].mean()),
    Year_Built=df['Year_Built'].fillna(df['Year_Built'].mean()),
    OverAll_Condition=df['OverAll_Condition'].fillna(df['OverAll_Condition'].mean())
)

# Replacing missing values with mode for categorical columns
df['Ready_to_move'] = df['Ready_to_move'].fillna(df['Ready_to_move'].mode()[0])

# Print DataFrame after imputation
print("\nDataFrame after replacing missing values with mean (for numeric) and mode (for categorical)")
print(df)

Original DataFrame
   Square_Feet_Area  Year_Built  OverAll_Condition Ready_to_move  Sale_Price
0            8500.0      2003.0                5.0           Yes      200000
1            9600.0      2001.0                8.0            No      180000
2               NaN         NaN                6.0           Yes      215000
3           11250.0      1990.0                7.0           NaN      210000
4               NaN      2000.0                NaN            No      190000
5            9550.0      2006.0                7.0           NaN      250000
6           14260.0      1978.0                8.0           Yes      225000
7               NaN      1950.0                6.0           Yes      220000
8            1830.0         NaN                NaN            No      240000
9           11500.0      2020.0                9.0           Yes      230000

DataFrame after replacing missing values with mean (for numeric) and mode (for categorical)
   Square_Feet_Area  Year_Built  OverAll_

In [6]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

# Create sample dataset
data = {
    "Square_Feet_Area": [8500, 9600, np.nan, 11250, np.nan, 9550, 14260, np.nan, 1830, 11500],
    "Year_Built": [2003, 2001, np.nan, 1990, 2000, 2006, 1978, 1950, np.nan, 2020],
    "OverAll_Condition": [5, 8, 6, 7, np.nan, 7, 8, 6, np.nan, 9],
    "Ready_to_move": ["Yes", "No", "Yes", np.nan, "No", np.nan, "Yes", "Yes", "No", "Yes"],
    "Sale_Price": [200000, 180000, 215000, 210000, 190000, 250000, 225000, 220000, 240000, 230000]
}

df = pd.DataFrame(data)

# Print Original DataFrame
print("Original DataFrame")
print(df)

# Create an imputer for numeric columns to replace NaN with mean
numeric_imputer = SimpleImputer(strategy='mean')

# Apply the imputer to the numeric columns
df[['Square_Feet_Area', 'Year_Built', 'OverAll_Condition']] = numeric_imputer.fit_transform(df[['Square_Feet_Area', 'Year_Built', 'OverAll_Condition']])

# Replacing missing values with mode for categorical columns
df['Ready_to_move'] = df['Ready_to_move'].fillna(df['Ready_to_move'].mode()[0])

# Print DataFrame after imputation
print("\nDataFrame after replacing missing values with mean (for numeric) and mode (for categorical)")
print(df)

Original DataFrame
   Square_Feet_Area  Year_Built  OverAll_Condition Ready_to_move  Sale_Price
0            8500.0      2003.0                5.0           Yes      200000
1            9600.0      2001.0                8.0            No      180000
2               NaN         NaN                6.0           Yes      215000
3           11250.0      1990.0                7.0           NaN      210000
4               NaN      2000.0                NaN            No      190000
5            9550.0      2006.0                7.0           NaN      250000
6           14260.0      1978.0                8.0           Yes      225000
7               NaN      1950.0                6.0           Yes      220000
8            1830.0         NaN                NaN            No      240000
9           11500.0      2020.0                9.0           Yes      230000

DataFrame after replacing missing values with mean (for numeric) and mode (for categorical)
   Square_Feet_Area  Year_Built  OverAll_