In [None]:
class PreprocessData(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        
        # Impute missing values
        X['Gender'].fillna('Unknown', inplace=True)
        X['Married'].fillna('No', inplace=True)
        X['Dependents'].fillna(0, inplace=True)
        X['Self_Employed'].fillna('No', inplace=True)
        X['Dependents'] = X['Dependents'].replace({'3+': 3})

        # Convert various columns to numeric values
        X['Gender'] = X['Gender'].replace({'Male': 0, 'Female': 1, 'Unknown': 2})
        X['Married'] = X['Married'].replace({'No': 0, 'Yes': 1})
        X['Education'] = X['Education'].replace({'Not Graduate': 0, 'Graduate': 1})
        X['Self_Employed'] = X['Self_Employed'].replace({'No': 0, 'Yes': 1})
        X['Dependents'] = X['Dependents'].astype(int)

        # Perform ordinal encoding for 'property_area'
        encoder = OrdinalEncoder()
        X['property_area'] = encoder.fit_transform(X[['Property_Area']])

        # Drop the original 'Property_Area' column
        X = X.drop('Property_Area', axis=1)

        columns_to_impute = ['LoanAmount', 'Loan_Amount_Term', 'Credit_History']
        imputer = KNNImputer(n_neighbors=5)
        
        if X.shape[0] == 1:
            # Reshape the data to be 2D
            imputed_data = imputer.fit_transform(X[columns_to_impute].values.reshape(1, -1))
        else:
            imputed_data = imputer.fit_transform(X[columns_to_impute])
        
        X[columns_to_impute] = imputed_data

        # Convert the imputed values to a DataFrame
        imputed_df = pd.DataFrame(imputed_data, columns=columns_to_impute, index=X.index)

        # Merge the imputed values back into the original DataFrame
        X.drop(columns=columns_to_impute, inplace=True)
        X = pd.concat([X, imputed_df], axis=1)

        # Round 'LoanAmount' and 'Loan_Amount_Term' to the nearest whole number
        X['LoanAmount'] = X['LoanAmount'].round().astype(int)
        X['Loan_Amount_Term'] = X['Loan_Amount_Term'].round().astype(int)

        # Multiply values by 1000 for ApplicantIncome, CoapplicantIncome, and LoanAmount
        X['ApplicantIncome'] *= 12
        X['CoapplicantIncome'] *= 12
        X['LoanAmount'] *= 1000

        # Create categorical variable for Loan_Amount_Term
        X['Loan_Term_Category'] = pd.cut(X['Loan_Amount_Term'], bins=[0, 180, 360, np.inf], labels=['Short-term', 'Medium-term', 'Long-term'])

        # Perform ordinal encoding for 'Loan_Term_Category'
        encoder = OrdinalEncoder()
        X['loan_term_category'] = encoder.fit_transform(X[['Loan_Term_Category']])

        # Drop the original 'Loan_Term_Category' column
        X = X.drop('Loan_Term_Category', axis=1)

        return X

num_attribs = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
cat_attribs = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area']

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="median")),
    ('std_scaler', StandardScaler()),
])

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="most_frequent")),
    ('encoder', OneHotEncoder()),
])

full_pipeline = ColumnTransformer([
    ('preprocessor', PreprocessData(), ['LoanAmount', 'Loan_Amount_Term', 'Loan_Status']),
    ('num', num_pipeline, num_attribs),
    ('cat', cat_pipeline, cat_attribs),
])

final_pipeline = Pipeline(steps=[
    ('full_pipeline', full_pipeline),
    ('classifier', DecisionTreeClassifier(random_state=42))
])

# Initialize the pipeline with your custom transformers
final_pipeline = Pipeline(steps=[
    ('preprocess', PreprocessData()),
    ('classifier', DecisionTreeClassifier(random_state=42))
])
