In [None]:
numerical_features = ['LotFrontage', 'LotArea', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea',
                      'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF',
                      '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath',
                      'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd',
                      'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF',
                      'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
                      'MiscVal', 'MoSold', 'YrSold']
# Unordered categorical columns
nominative_categorical_features = ["MSSubClass", "MSZoning", "Street", "Alley", "LandContour", "LotConfig", "Neighborhood", "Condition1", 
                                   "Condition2", "BldgType", "HouseStyle", "RoofStyle", "RoofMatl",  "Exterior1st", "Exterior2nd", "MasVnrType", 
                                   "Foundation", "Heating", "CentralAir", "GarageType", "MiscFeature", "SaleType", "SaleCondition"]
# Ordered categorical columns
poor_to_fair = ["Po", "Fa", "TA", "Gd", "Ex"]
one_to_ten = list(range(10))

ordinal_categorical_features = {
    "OverallQual": one_to_ten,
    "OverallCond": one_to_ten,
    "ExterQual": poor_to_fair,
    "ExterCond": poor_to_fair,
    "BsmtQual": poor_to_fair,
    "BsmtCond": poor_to_fair,
    "HeatingQC": poor_to_fair,
    "KitchenQual": poor_to_fair,
    "FireplaceQu": poor_to_fair,
    "GarageQual": poor_to_fair,
    "GarageCond": poor_to_fair,
    "PoolQC": poor_to_fair,
    "LotShape": ["Reg", "IR1", "IR2", "IR3"],
    "LandSlope": ["Sev", "Mod", "Gtl"],
    "BsmtExposure": ["No", "Mn", "Av", "Gd"],
    "BsmtFinType1": ["Unf", "LwQ", "Rec", "BLQ", "ALQ", "GLQ"],
    "BsmtFinType2": ["Unf", "LwQ", "Rec", "BLQ", "ALQ", "GLQ"],
    "Functional": ["Sal", "Sev", "Maj1", "Maj2", "Mod", "Min2", "Min1", "Typ"],
    "GarageFinish": ["Unf", "RFn", "Fin"],
    "PavedDrive": ["N", "P", "Y"],
    "Utilities": ["NoSeWa", "NoSewr", "AllPub"],
    "CentralAir": ["N", "Y"],
    "Electrical": ["Mix", "FuseP", "FuseF", "FuseA", "SBrkr"],
    "Fence": ["MnWw", "GdWo", "MnPrv", "GdPrv"],
}

# Add a None level for missing values
ordinal_categorical_features = {key: ["None"] + value for key, value in ordinal_categorical_features.items()}


def encode_categories(df):
    # Assign the 'category' dtype to the nominative categorical columns
    for feature in nominative_categorical_features:
        df[feature] = df[feature].astype("category")
        # Add a None category for missing values
        if "None" not in df[feature].cat.categories:
            df[feature].cat.add_categories("None", inplace=True)

    # Assign the 'CategoricalDtype' dtype to the ordinal categorical columns
    for feature, levels in ordinal_categorical_features.items():
        df[feature] = df[feature].astype(CategoricalDtype(levels,
                                                          ordered=True))
        
    return df


def impute_missing_values(df):
    for name in df.select_dtypes("number"):
        df[name] = df[name].fillna(0)
    for name in df.select_dtypes("category"):
        df[name] = df[name].fillna("None")
    return df

X_train = encode_categories(X_train)
X_test = encode_categories(X_test)

X_train = impute_missing_values(X_train)
X_test = impute_missing_values(X_test)