In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

# Sample data
data = pd.DataFrame({
    'Age': [25, 30, None, 35, 28],
    'Income': [50000, 60000, 75000, None, 55000],
    'Gender': ['M', 'F', 'M', 'F', 'M'],
    'Loan_Status': ['Approved', 'Rejected', 'Approved', 'Approved', 'Rejected']
})

print("Original Data:")
print(data)

# Handling missing values with mean imputation
imputer = SimpleImputer(strategy='mean')
data[['Age','Income']] = imputer.fit_transform(data[['Age','Income']])

print("\nMissing Data replaced with mean:")
print(data)

# Encoding categorical variables (Gender and Loan_Status)
le = LabelEncoder()
data['Gender'] = le.fit_transform(data['Gender'])
data['Loan_Status'] = le.fit_transform(data['Loan_Status'])

print("\n1-hot encoding categorical data:")
print(data)

# Scaling numerical features (Age and Income) using StandardScaler
scaler = StandardScaler()
data[['Age', 'Income']] = scaler.fit_transform(data[['Age', 'Income']])

# Display the preprocessed and cleansed data
print("\nScaling numerical features using StandardScaler:")
print(data)

Original Data:
    Age   Income Gender Loan_Status
0  25.0  50000.0      M    Approved
1  30.0  60000.0      F    Rejected
2   NaN  75000.0      M    Approved
3  35.0      NaN      F    Approved
4  28.0  55000.0      M    Rejected

Missing Data replaced with mean:
    Age   Income Gender Loan_Status
0  25.0  50000.0      M    Approved
1  30.0  60000.0      F    Rejected
2  29.5  75000.0      M    Approved
3  35.0  60000.0      F    Approved
4  28.0  55000.0      M    Rejected

1-hot encoding categorical data:
    Age   Income  Gender  Loan_Status
0  25.0  50000.0       1            0
1  30.0  60000.0       0            1
2  29.5  75000.0       1            0
3  35.0  60000.0       0            0
4  28.0  55000.0       1            1

Scaling numerical features using StandardScaler:
        Age    Income  Gender  Loan_Status
0 -1.382164 -1.195229       1            0
1  0.153574  0.000000       0            1
2  0.000000  1.792843       1            0
3  1.689312  0.000000       0      