In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.impute import KNNImputer

In [2]:
#‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
data = pd.read_csv("train_cleaned.csv")

In [7]:
#‡∏Å‡∏≥‡∏´‡∏ô‡∏î features
features = [
    "OverallQual",
    "TotalBsmtSF",
    "LotArea",
    "GarageCars",
    "Fireplaces",
    "BedroomAbvGr",
    "GrLivArea",
    "FullBath",
    "Neighborhood"
  ]
target = 'SalePrice'

#‡∏ñ‡πâ‡∏≤‡∏ö‡∏≤‡∏á‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡πÄ‡∏õ‡πá‡∏ô category ‡πÅ‡∏õ‡∏•‡∏á‡πÄ‡∏õ‡πá‡∏ô‡∏ï‡∏±‡∏ß‡πÄ‡∏•‡∏Ç
for col in ['Neighborhood']:
    if data[col].dtype == 'object':
        data[col] = data[col].astype('category').cat.codes

X = data[features]
y = data[target]

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏•‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢‡∏£‡∏≤‡∏Ñ‡∏≤‡∏ö‡πâ‡∏≤‡∏ô
print("üéØ Training Price Model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
price_model = RandomForestRegressor(n_estimators=200, random_state=42)
price_model.fit(X_train, y_train)

#‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡πÇ‡∏°‡πÄ‡∏î‡∏•
joblib.dump(price_model, "ml_model/price_model.joblib")
print("‚úÖ Saved: price_model.joblib")

# ‡∏™‡∏£‡πâ‡∏≤‡∏á Reverse Models (‡πÄ‡∏î‡∏≤ feature ‡∏à‡∏≤‡∏Å‡∏£‡∏≤‡∏Ñ‡∏≤)
#print("üéØ Training Reverse Models...")
#for f in features:
#    rev_X = y.values.reshape(-1, 1)  # ‡πÉ‡∏ä‡πâ SalePrice ‡πÄ‡∏õ‡πá‡∏ô input
#    rev_y = X[f].values
#    rev_model = LinearRegression()
#    rev_model.fit(rev_X, rev_y)
#    filename = f"ml_model/reverse_{f}.joblib"
#    joblib.dump(rev_model, filename)
#    print(f"‚úÖ Saved: {filename}")

#‡∏™‡∏£‡πâ‡∏≤‡∏á Imputer Model (‡πÄ‡∏ï‡∏¥‡∏°‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏Ç‡∏≤‡∏î)
print("Training Imputer Model...")
imputer = KNNImputer(n_neighbors=5)
imputer.fit(X)
joblib.dump(imputer, "ml_model/imputer_model.joblib")
print("Saved: imputer_model.joblib")

print("üéâ All models trained and saved successfully!")

üéØ Training Price Model...
‚úÖ Saved: price_model.joblib
Training Imputer Model...
Saved: imputer_model.joblib
üéâ All models trained and saved successfully!


In [9]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, r2_score

# ===========================================================
# (Price Model)
# ===========================================================
print("\n Testing Price Model...")
price_model = joblib.load("ml_model/price_model.joblib")

y_pred = price_model.predict(X)

mae = mean_absolute_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f" Mean Absolute Error (MAE): {mae:,.2f}")
print(f" R¬≤ Score: {r2:.3f}")

# ‡∏ï‡∏±‡∏ß‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢ 1 ‡∏ö‡πâ‡∏≤‡∏ô
sample = X.iloc[0:1]
pred_price = price_model.predict(sample)[0]
print("\n Example Prediction:")
print("Input features:", sample.to_dict(orient='records')[0])
print(f"Predicted SalePrice: {pred_price:,.2f}")
print(f"Actual SalePrice: {y.iloc[0]:,.2f}")

# ===========================================================
#  Reverse Models (‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢ features ‡∏à‡∏≤‡∏Å‡∏£‡∏≤‡∏Ñ‡∏≤)
# ===========================================================
#print("\n Testing Reverse Models...")
#for f in features:
#    reverse_model = joblib.load(f"ml_model/reverse_{f}.joblib")
#    sample_price = [[200000]]  # ‡∏ó‡∏î‡∏™‡∏≠‡∏ö‡∏î‡πâ‡∏ß‡∏¢‡∏£‡∏≤‡∏Ñ‡∏≤‡∏ö‡πâ‡∏≤‡∏ô 200,000
#    predicted_value = reverse_model.predict(sample_price)[0]
#   print(f"{f}: {predicted_value:.2f}")

# ===========================================================
#  Imputer
# ===========================================================
print("\n Testing Imputer Model...")
imputer = joblib.load("ml_model/imputer_model.joblib")

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏°‡∏µ missing value
sample_with_missing = X.iloc[0:1].copy()
sample_with_missing.iloc[0, [2, 5]] = None  # ‡∏•‡∏ö GrLivArea ‡πÅ‡∏•‡∏∞ FullBath

print("Before imputing:")
print(sample_with_missing)

filled = imputer.transform(sample_with_missing)
filled_df = pd.DataFrame(filled, columns=X.columns)

print("\nAfter imputing:")
print(filled_df)



 Testing Price Model...
 Mean Absolute Error (MAE): 9,703.72
 R¬≤ Score: 0.956

 Example Prediction:
Input features: {'OverallQual': 7, 'TotalBsmtSF': 856, 'LotArea': 8450, 'GarageCars': 2, 'Fireplaces': 0, 'BedroomAbvGr': 3, 'GrLivArea': 1710, 'FullBath': 2, 'Neighborhood': 5}
Predicted SalePrice: 201,029.75
Actual SalePrice: 208,500.00

 Testing Imputer Model...
Before imputing:
   OverallQual  TotalBsmtSF  LotArea  GarageCars  Fireplaces  BedroomAbvGr  \
0            7          856      NaN           2           0           NaN   

   GrLivArea  FullBath  Neighborhood  
0       1710         2             5  

After imputing:
   OverallQual  TotalBsmtSF  LotArea  GarageCars  Fireplaces  BedroomAbvGr  \
0          7.0        856.0   9379.8         2.0         0.0           3.2   

   GrLivArea  FullBath  Neighborhood  
0     1710.0       2.0           5.0  


# Testing

In [15]:
import pandas as pd
import numpy as np
import joblib

# -----------------------------
# 1 ‡πÇ‡∏´‡∏•‡∏î‡πÇ‡∏°‡πÄ‡∏î‡∏•
# -----------------------------
#price_model = joblib.load("ml_model/price_model.joblib")
imputer_model = joblib.load("ml_model/imputer_model.joblib")

features = [
    "OverallQual",
    "TotalBsmtSF",
    "LotArea",
    "GarageCars",
    "Fireplaces",
    "BedroomAbvGr",
    "GrLivArea",
    "FullBath",
    "Neighborhood"
]

# ‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡∏ó‡∏µ‡πà‡πÄ‡∏õ‡πá‡∏ô integer ‡∏ï‡πâ‡∏≠‡∏á‡∏õ‡∏±‡∏î
int_features = [
    "OverallQual", "TotalBsmtSF", "LotArea", "GarageCars",
    "Fireplaces", "BedroomAbvGr", "GrLivArea", "FullBath"
]

#reverse_models = {f: joblib.load(f"ml_model/reverse_{f}.joblib") for f in features}

# -----------------------------
# 2 Mapping Neighborhood
# -----------------------------
neighborhood_mapping = {
    0: "Unknown",
    1: "Bloomington Heights",
    2: "Bluestem",
    3: "Briardale",
    4: "Brookside",
    5: "Clear Creek",
    6: "College Creek",
    7: "Crawford",
    8: "Edwards",
    9: "Gilbert",
    10: "Iowa DOT and Rail Road",
    11: "Meadow Village",
    12: "Mitchell",
    13: "North Ames",
    14: "Northridge",
    15: "Northpark Villa",
    16: "Northridge Heights",
    17: "Northwest Ames",
    18: "Old Town",
    19: "South & West of Iowa State University",
    20: "Sawyer",
    21: "Sawyer West",
    22: "Somerset",
    23: "Stone Brook",
    24: "Timberland",
    25: "Veenker"
}

# -----------------------------
# 3 ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏à‡∏≤‡∏Å‡∏ú‡∏π‡πâ‡πÉ‡∏ä‡πâ (‡∏Å‡∏£‡∏≠‡∏Å‡πÑ‡∏°‡πà‡∏Ñ‡∏£‡∏ö)
# -----------------------------
user_input = {
    "OverallQual": 7,
    "GrLivArea": 1500,
    "GarageCars": None,
    "TotalBsmtSF": None,
    "FullBath": 2,
    "Neighborhood": None,
    "LotArea": 8000,
    "Fireplaces": None,
    "BedroomAbvGr": None,
    "SalePrice": None
}

input_df = pd.DataFrame([user_input])

# -----------------------------
# 4 Imputer Prediction
# -----------------------------
filled_imputer = input_df.drop(columns=["SalePrice"])
filled_imputer = filled_imputer[features]  # ‡∏à‡∏±‡∏î‡∏•‡∏≥‡∏î‡∏±‡∏ö‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå

# ‡πÄ‡∏ï‡∏¥‡∏°‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏Ç‡∏≤‡∏î‡∏î‡πâ‡∏ß‡∏¢ Imputer
filled_imputer_array = imputer_model.transform(filled_imputer)
filled_imputer = pd.DataFrame(filled_imputer_array, columns=features)

# ‡∏õ‡∏±‡∏î integer ‡∏ü‡∏µ‡πÄ‡∏à‡∏≠‡∏£‡πå
for col in int_features:
    filled_imputer[col] = filled_imputer[col].round().astype('Int64')

# ‡πÄ‡∏ï‡∏¥‡∏°‡∏Ñ‡πà‡∏≤ missing ‡∏Ç‡∏≠‡∏á Neighborhood ‡πÄ‡∏õ‡πá‡∏ô 0
filled_imputer["Neighborhood"] = filled_imputer["Neighborhood"].fillna(0).round().astype(int)

# ‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢‡∏£‡∏≤‡∏Ñ‡∏≤‡∏î‡πâ‡∏ß‡∏¢ Price Model
pred_price_imputer = price_model.predict(filled_imputer)[0]

# ‡πÅ‡∏õ‡∏•‡∏á Neighborhood ‡πÄ‡∏õ‡πá‡∏ô‡∏ä‡∏∑‡πà‡∏≠‡∏à‡∏£‡∏¥‡∏á‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÅ‡∏™‡∏î‡∏á‡∏ú‡∏•
filled_imputer["Neighborhood"] = filled_imputer["Neighborhood"].map(lambda x: neighborhood_mapping.get(x, "Unknown"))

# -----------------------------
# 5 Reverse Prediction (‡∏ó‡∏≥‡∏ô‡∏≤‡∏¢ features ‡∏à‡∏≤‡∏Å SalePrice)
# -----------------------------
#if user_input["SalePrice"] is None:
#    guessed_features = {}
#    for f in features:
#        guessed_value = reverse_models[f].predict([[pred_price_imputer]])[0]
#        if f in int_features:
#            guessed_value = int(round(guessed_value))
#        if f == "Neighborhood":
#            guessed_value = int(round(guessed_value))
#        guessed_features[f] = guessed_value

    # ‡∏™‡∏£‡πâ‡∏≤‡∏á DataFrame ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö price_model
#    filled_reverse = pd.DataFrame([guessed_features])
#    pred_price_reverse = price_model.predict(filled_reverse)[0]

    # ‡πÅ‡∏õ‡∏•‡∏á Neighborhood ‡πÄ‡∏õ‡πá‡∏ô‡∏ä‡∏∑‡πà‡∏≠‡∏à‡∏£‡∏¥‡∏á
#    filled_reverse["Neighborhood"] = filled_reverse["Neighborhood"].map(lambda x: neighborhood_mapping.get(x, "Unknown"))
#else:
#    filled_reverse = None
#    pred_price_reverse = None

# -----------------------------
#  ‡πÅ‡∏™‡∏î‡∏á‡∏ú‡∏•
# -----------------------------
print("===  RESULT COMPARISON (Single Prediction with Neighborhood names) ===")
print(f"1 Imputer Prediction:    Price = {pred_price_imputer:,.0f}, Features =\n{filled_imputer.iloc[0].to_dict()}")
#print(f"2 Reverse Prediction:    Price = {pred_price_reverse:,.0f}, Features =\n{filled_reverse.iloc[0].to_dict()}")


===  RESULT COMPARISON (Single Prediction with Neighborhood names) ===
1 Imputer Prediction:    Price = 184,703, Features =
{'OverallQual': 7, 'TotalBsmtSF': 1125, 'LotArea': 8000, 'GarageCars': 2, 'Fireplaces': 0, 'BedroomAbvGr': 3, 'GrLivArea': 1500, 'FullBath': 2, 'Neighborhood': 'North Ames'}


In [21]:
import pandas as pd
import numpy as np
import joblib
from sklearn.metrics import mean_squared_error, mean_absolute_error

# -----------------------------
# ‡πÇ‡∏´‡∏•‡∏î‡πÇ‡∏°‡πÄ‡∏î‡∏• Imputer
# -----------------------------
try:
    imputer_model = joblib.load("ml_model/imputer_model.joblib")
    print("Imputer Model loaded successfully.\n")
except FileNotFoundError:
    print("ERROR: imputer_model.joblib not found.")
    exit()
neighborhood_list = [
    "Blmngtn", "Blueste", "Briardl", "Brooksd", "ClearCr",
    "CollgCr", "Crawfor", "Edwards", "Gilbert", "IDOTRR",
    "MeadowV", "Mitchel", "NWAmes", "NoRidge", "NPkVill",
    "NridgHt", "NwAmes", "OldTown", "SWISU", "Sawyer",
    "SawyerW", "Somerst", "StoneBr", "Timber", "Veenker"
]

neighborhood_to_num = {name: i for i, name in enumerate(neighborhood_list)}
# -----------------------------
# ‡πÇ‡∏´‡∏•‡∏î "‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏à‡∏£‡∏¥‡∏á‡∏ó‡∏µ‡πà‡∏™‡∏°‡∏ö‡∏π‡∏£‡∏ì‡πå"
# -----------------------------
try:
    full_data = pd.read_csv("train_cleaned.csv")
except FileNotFoundError:
    print("ERROR: ‡πÑ‡∏°‡πà‡∏û‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏î‡∏¥‡∏ö‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÉ‡∏ä‡πâ‡πÄ‡∏õ‡πá‡∏ô '‡πÄ‡∏â‡∏•‡∏¢'")
    print("‡πÇ‡∏õ‡∏£‡∏î‡∏£‡∏∞‡∏ö‡∏∏‡∏ï‡∏≥‡πÅ‡∏´‡∏ô‡πà‡∏á‡πÑ‡∏ü‡∏•‡πå‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏ñ‡∏π‡∏Å‡∏ï‡πâ‡∏≠‡∏á")
    exit()

# ‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡∏ó‡∏µ‡πà‡πÇ‡∏°‡πÄ‡∏î‡∏• Imputer ‡∏ñ‡∏π‡∏Å‡πÄ‡∏ó‡∏£‡∏ô‡∏°‡∏≤
features = [
    "OverallQual", "TotalBsmtSF", "LotArea", "GarageCars",
    "Fireplaces", "BedroomAbvGr", "GrLivArea", "FullBath", "Neighborhood"
]

# ‡∏Ñ‡∏±‡∏î‡∏•‡∏≠‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡∏ó‡∏µ‡πà‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Ç‡πâ‡∏≠‡∏á
data_truth = full_data[features].copy()

print("Converting 'Neighborhood' strings to numbers...")
data_truth['Neighborhood'] = data_truth['Neighborhood'].map(neighborhood_to_num)

data_truth['Neighborhood'] = data_truth['Neighborhood'].fillna(0).astype(int)
# -----------------------------
# ‡∏™‡∏£‡πâ‡∏≤‡∏á ‡πÄ‡∏à‡∏≤‡∏∞‡∏à‡∏á‡∏•‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
# -----------------------------
data_with_holes = data_truth.copy()

# ‡∏™‡∏°‡∏°‡∏ï‡∏¥‡∏ó‡∏î‡∏™‡∏≠‡∏ö 2 ‡∏ü‡∏µ‡πÄ‡∏à‡∏≠‡∏£‡πå: GarageCars ‡πÅ‡∏•‡∏∞ TotalBsmtSF
# ‡∏™‡∏∏‡πà‡∏°‡∏•‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• 20% ‡∏Ç‡∏≠‡∏á 2 ‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡∏ô‡∏µ‡πâ
features_to_test = ['GarageCars', 'TotalBsmtSF']
percent_missing = 0.2

print(f"--- Artificially creating {percent_missing*100}% missing values in {features_to_test} ---")

# ‡πÄ‡∏Å‡πá‡∏ö‡πÄ‡∏â‡∏•‡∏¢‡πÅ‡∏•‡∏∞index ‡∏Ç‡∏≠‡∏á‡πÅ‡∏ñ‡∏ß‡∏ó‡∏µ‡πà‡πÄ‡∏£‡∏≤‡∏•‡∏ö
original_values = {}
missing_indices = {}

for col in features_to_test:
    # ‡∏™‡∏∏‡πà‡∏° index ‡∏ó‡∏µ‡πà‡∏à‡∏∞‡∏•‡∏ö
    np.random.seed(42) # ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÉ‡∏´‡πâ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏Ñ‡∏á‡∏ó‡∏µ‡πà
    n_missing = int(len(data_with_holes) * percent_missing)
    idx_to_remove = np.random.choice(data_with_holes.index, n_missing, replace=False)

    # ‡πÄ‡∏Å‡πá‡∏ö "‡πÄ‡∏â‡∏•‡∏¢" (‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡πÅ‡∏ñ‡∏ß‡∏ó‡∏µ‡πà‡∏ñ‡∏π‡∏Å‡∏•‡∏ö)
    original_values[col] = data_truth.loc[idx_to_remove, col].copy()
    missing_indices[col] = idx_to_remove

    # ‡∏™‡∏±‡πà‡∏á‡∏•‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• (‡πÅ‡∏ó‡∏ô‡∏ó‡∏µ‡πà‡∏î‡πâ‡∏ß‡∏¢ NaN)
    data_with_holes.loc[idx_to_remove, col] = np.nan
    print(f"Created {len(idx_to_remove)} missing values in '{col}'")

print("--------------------------------------------------\n")

# -----------------------------
# ‡πÉ‡∏ä‡πâ Imputer ‡πÄ‡∏ï‡∏¥‡∏°‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏´‡∏≤‡∏¢‡πÑ‡∏õ
# -----------------------------
print("Running imputer_model.transform()...")
# Imputer ‡∏£‡∏±‡∏ö DataFrame ‡πÅ‡∏•‡∏∞‡∏Ñ‡∏∑‡∏ô‡∏Ñ‡πà‡∏≤‡πÄ‡∏õ‡πá‡∏ô NumPy array
imputed_array = imputer_model.transform(data_with_holes)

# ‡πÅ‡∏õ‡∏•‡∏á‡∏Å‡∏•‡∏±‡∏ö‡πÄ‡∏õ‡πá‡∏ô DataFrame
imputed_data = pd.DataFrame(imputed_array, columns=features, index=data_truth.index)
print("Imputation complete.\n")

# -----------------------------
# ‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö "‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡πÄ‡∏ï‡∏¥‡∏°" ‡∏Å‡∏±‡∏ö "‡∏Ñ‡πà‡∏≤‡∏à‡∏£‡∏¥‡∏á"
# -----------------------------
print("--- Imputer Performance Metrics ---")

for col in features_to_test:
    # "‡πÄ‡∏â‡∏•‡∏¢"
    truth = original_values[col]

    # "‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡πÇ‡∏°‡πÄ‡∏î‡∏•‡πÄ‡∏ï‡∏¥‡∏°" (‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡πÅ‡∏ñ‡∏ß‡∏ó‡∏µ‡πà‡πÄ‡∏Ñ‡∏¢‡∏´‡∏≤‡∏¢‡πÑ‡∏õ)
    idx = missing_indices[col]
    predicted = imputed_data.loc[idx, col]

    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Error
    # (‡∏ï‡πâ‡∏≠‡∏á‡∏õ‡∏±‡∏î‡πÄ‡∏®‡∏© predicted ‡∏Å‡πà‡∏≠‡∏ô ‡∏ñ‡πâ‡∏≤ feature ‡∏ô‡∏±‡πâ‡∏ô‡πÄ‡∏õ‡πá‡∏ô int)
    if col == 'GarageCars':
        predicted = predicted.round()

    mae = mean_absolute_error(truth, predicted)
    rmse = np.sqrt(mean_squared_error(truth, predicted))

    print(f"Feature: '{col}'")
    print(f"  MAE (Mean Absolute Error): {mae:.4f}")
    print(f"  RMSE (Root Mean Squared Error): {rmse:.4f}\n")

print("MAE :‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢‡πÅ‡∏•‡πâ‡∏ß ‡πÇ‡∏°‡πÄ‡∏î‡∏• ‡πÄ‡∏î‡∏≤‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏´‡∏≤‡∏¢‡πÑ‡∏õ‡∏ú‡∏¥‡∏î‡∏û‡∏•‡∏≤‡∏î‡πÑ‡∏õ‡πÄ‡∏ó‡πà‡∏≤‡πÑ‡∏´‡∏£‡πà")
print("‡πÇ‡∏°‡πÄ‡∏î‡∏•‡πÄ‡∏î‡∏≤‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡∏à‡∏≠‡∏î‡∏£‡∏ñ‡∏ú‡∏¥‡∏î‡∏û‡∏•‡∏≤‡∏î‡πÑ‡∏õ‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢‡∏õ‡∏£‡∏∞‡∏°‡∏≤‡∏ì 0.-- ‡∏Ñ‡∏±‡∏ô")

Imputer Model loaded successfully.

Converting 'Neighborhood' strings to numbers...
--- Artificially creating 20.0% missing values in ['GarageCars', 'TotalBsmtSF'] ---
Created 292 missing values in 'GarageCars'
Created 292 missing values in 'TotalBsmtSF'
--------------------------------------------------

Running imputer_model.transform()...
Imputation complete.

--- Imputer Performance Metrics ---
Feature: 'GarageCars'
  MAE (Mean Absolute Error): 0.3836
  RMSE (Root Mean Squared Error): 0.6621

Feature: 'TotalBsmtSF'
  MAE (Mean Absolute Error): 237.9986
  RMSE (Root Mean Squared Error): 335.7427

--------------------------------------------------
MAE ‡∏´‡∏°‡∏≤‡∏¢‡∏ñ‡∏∂‡∏á: ‡πÇ‡∏î‡∏¢‡πÄ‡∏â‡∏•‡∏µ‡πà‡∏¢‡πÅ‡∏•‡πâ‡∏ß ‡πÇ‡∏°‡πÄ‡∏î‡∏• '‡πÄ‡∏î‡∏≤' ‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏´‡∏≤‡∏¢‡πÑ‡∏õ‡∏ú‡∏¥‡∏î‡∏û‡∏•‡∏≤‡∏î‡πÑ‡∏õ‡πÄ‡∏ó‡πà‡∏≤‡πÑ‡∏´‡∏£‡πà
‡πÄ‡∏ä‡πà‡∏ô ‡∏ñ‡πâ‡∏≤ MAE ‡∏Ç‡∏≠‡∏á 'GarageCars' = 0.45 ‡∏´‡∏°‡∏≤‡∏¢‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ß‡πà‡∏≤
‡πÇ‡∏°‡πÄ‡∏î‡∏•‡πÄ‡∏î‡∏≤‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡∏à‡∏≠‡∏î‡∏£‡∏ñ‡∏ú‡∏¥‡∏

‡πÇ‡∏´‡∏•‡∏î‡πÇ‡∏°‡πÄ‡∏î‡∏•
‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
‡∏à‡∏≥‡∏•‡∏≠‡∏á‡∏™‡∏ñ‡∏≤‡∏ô‡∏Å‡∏≤‡∏£‡∏ì‡πå ‡∏•‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏à‡∏£‡∏¥‡∏á‡∏ó‡∏¥‡πâ‡∏á‡πÑ‡∏õ 20% (‡∏à‡∏≥‡∏ô‡∏ß‡∏ô 292 ‡πÅ‡∏ñ‡∏ß) ‡∏à‡∏≤‡∏Å 2 ‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå ‡∏Ñ‡∏∑‡∏≠ GarageCars (‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡∏à‡∏≠‡∏î‡∏£‡∏ñ) ‡πÅ‡∏•‡∏∞ TotalBsmtSF (‡∏Ç‡∏ô‡∏≤‡∏î‡∏ä‡∏±‡πâ‡∏ô‡πÉ‡∏ï‡πâ‡∏î‡∏¥‡∏ô)
‡∏™‡∏±‡πà‡∏á‡πÉ‡∏´‡πâ‡πÇ‡∏°‡πÄ‡∏î‡∏•"‡πÄ‡∏î‡∏≤" ‡∏´‡∏£‡∏∑‡∏≠ "‡πÄ‡∏ï‡∏¥‡∏°" ‡∏Ñ‡πà‡∏≤‡πÉ‡∏ô‡∏ä‡πà‡∏≠‡∏á‡∏ß‡πà‡∏≤‡∏á 292 ‡∏ä‡πà‡∏≠‡∏á‡∏ô‡∏±‡πâ‡∏ô
‡∏ß‡∏±‡∏î‡∏ú‡∏• ‡πÄ‡∏ä‡πà‡∏ô ‡∏ñ‡πâ‡∏≤ MAE ‡∏Ç‡∏≠‡∏á 'GarageCars' = 0.45