In [14]:
# Reset environment and clear imports
import sys
import os
import joblib
from pathlib import Path

for module in list(sys.modules.keys()):
    if 'data_processing' in module or 'predict' in module or 'feature_engineering' in module or 'train' in module:
        del sys.modules[module]

project_dir = Path.cwd()
sys.path.insert(0, str(project_dir / 'src'))
sys.path.insert(0, str(project_dir / 'deployment'))

print("=" * 70)
print("STEP 1: LOADING THE TRAINED MODEL AND FEATURE ENGINEER")
print("=" * 70)

# Load the model and feature engineer directly
model_path = project_dir / 'models' / 'ridge_model.pkl'
feature_engineer_path = project_dir / 'models' / 'feature_engineer.pkl'

print(f"\nüì¶ Loading model from: {model_path}")
print(f"üì¶ Loading feature engineer from: {feature_engineer_path}")

try:
    model = joblib.load(model_path)
    feature_engineer = joblib.load(feature_engineer_path)
    
    print("‚úÖ Model loaded successfully")
    print("‚úÖ Feature engineer loaded successfully")
except Exception as e:
    print(f"‚ùå Error loading: {e}")
    raise

print("\n" + "=" * 70)
print("STEP 2: EXAMINING MODEL STRUCTURE")
print("=" * 70)

print(f"\nModel type: {type(model).__name__}")
print(f"Feature engineer type: {type(feature_engineer).__name__}")

# Check if model has named_steps (Pipeline)
if hasattr(model, 'named_steps'):
    print(f"\nModel pipeline steps:")
    for name, step in model.named_steps.items():
        print(f"  - {name}: {type(step).__name__}")

# Print model expected features
if hasattr(model, 'n_features_in_'):
    print(f"\n‚úì Model expects {model.n_features_in_} features")

print("\n" + "=" * 70)
print("STEP 3: EXAMINING FEATURE ENGINEER STATE")
print("=" * 70)

print(f"\nFeature engineer scaler: {type(feature_engineer.scaler).__name__ if feature_engineer.scaler else 'None'}")
print(f"Feature engineer has feature_columns: {feature_engineer.feature_columns is not None}")

if feature_engineer.feature_columns:
    print(f"\n‚úì Total engineered features: {len(feature_engineer.feature_columns)}")
    print(f"\nFirst 15 feature columns:")
    for i, col in enumerate(feature_engineer.feature_columns[:15], 1):
        print(f"  {i:2d}. {col}")
    print("  ...")
    print(f"\nLast 5 feature columns:")
    for i, col in enumerate(feature_engineer.feature_columns[-5:], len(feature_engineer.feature_columns)-4):
        print(f"  {i:2d}. {col}")
else:
    print("‚ùå Feature columns not captured - need to retrain or examine saved state")

print("\n‚úÖ State inspection complete")


STEP 1: LOADING THE TRAINED MODEL AND FEATURE ENGINEER

üì¶ Loading model from: e:\GitHub\House Price Regression\models\ridge_model.pkl
üì¶ Loading feature engineer from: e:\GitHub\House Price Regression\models\feature_engineer.pkl
‚úÖ Model loaded successfully
‚úÖ Feature engineer loaded successfully

STEP 2: EXAMINING MODEL STRUCTURE

Model type: Pipeline
Feature engineer type: FeatureEngineer

Model pipeline steps:
  - model: Ridge

‚úì Model expects 215 features

STEP 3: EXAMINING FEATURE ENGINEER STATE

Feature engineer scaler: None
Feature engineer has feature_columns: False
‚ùå Feature columns not captured - need to retrain or examine saved state

‚úÖ State inspection complete


In [15]:
import pandas as pd
from src.complete_dataset import complete_sample_with_nan, dataset_columns
from predict import HousingPricePredictor

print("=" * 70)
print("STEP 4: EXTRACTING ACTUAL FEATURE NAMES FROM MODEL")
print("=" * 70)

# Get the feature names from the model's Ridge estimator
ridge_model = model.named_steps['model']

# Check if the model has get_feature_names_out() method (sklearn >= 1.0)
try:
    if hasattr(ridge_model, 'get_feature_names_out'):
        # This won't work directly on Ridge, need to check the preprocessor
        print("\n‚ö†Ô∏è Ridge model doesn't directly store feature names")
    else:
        print("\n‚ö†Ô∏è Model doesn't have get_feature_names_out method")
except Exception as e:
    print(f"\n‚ö†Ô∏è Error: {e}")

# Try to get feature names from the pipeline
try:
    feature_names = model.get_feature_names_out()
    print(f"\n‚úÖ Got feature names from pipeline: {len(feature_names)} features")
    actual_features = list(feature_names)
except Exception as e:
    print(f"\n‚ùå Could not get feature names from pipeline: {e}")
    # Fallback: create generic feature names
    actual_features = [f"feature_{i}" for i in range(model.n_features_in_)]
    print(f"‚ö†Ô∏è Using generic feature names: feature_0, feature_1, ..., feature_{len(actual_features)-1}")

print(f"\nModel expected features ({len(actual_features)}):")
if len(actual_features) < 50:
    for i, feat in enumerate(actual_features, 1):
        print(f"  {i:3d}. {feat}")
else:
    for i, feat in enumerate(actual_features[:15], 1):
        print(f"  {i:3d}. {feat}")
    print("  ...")
    for i, feat in enumerate(actual_features[-10:], len(actual_features)-9):
        print(f"  {i:3d}. {feat}")

print("\n" + "=" * 70)
print("STEP 5: TESTING PREDICTION WITH COMPLETE_DATASET")
print("=" * 70)

# Sample record (incomplete)
sample_record = {
    'OverallQual': 7,
    'GrLivArea': 2000,
    'TotalBsmtSF': 1000,
    'GarageCars': 2,
    'GarageArea': 500,
    'YearBuilt': 2000,
    'Neighborhood': 'CollgCr',
    'HouseStyle': '2Story',
    'ExterQual': 'Gd',
    'KitchenQual': 'TA',
    'BsmtQual': 'Gd',
    'BsmtCond': 'TA',
    'GarageType': 'Attchd',
    'GarageFinish': 'RFn',
    'GarageQual': 'TA',
    'GarageCond': 'TA'
}

print(f"\nüìã Original sample: {len(sample_record)} keys")
completed_sample = complete_sample_with_nan(sample_record)
print(f"‚úÖ Completed sample: {len(completed_sample)} keys")

# Now test with the predictor
print("\n" + "-" * 70)
print("Testing HousingPricePredictor:")
print("-" * 70)

base_dir = project_dir
model_path = base_dir / 'models' / 'ridge_model.pkl'
feature_engineer_path = base_dir / 'models' / 'feature_engineer.pkl'

try:
    predictor = HousingPricePredictor(str(model_path), str(feature_engineer_path))
    print("‚úÖ Predictor initialized")
    
    result = predictor.predict(completed_sample)
    print(f"\nüéØ ‚úÖ PREDICTION SUCCESSFUL!")
    print(f"Predicted Price: ${result['predicted_price']:,.2f}")
    print(f"Model Type: {result['model_type']}")
    print(f"Confidence: {result['prediction_confidence']}")
    
except Exception as pred_error:
    print(f"\n‚ùå Prediction failed: {type(pred_error).__name__}")
    print(f"Error: {str(pred_error)[:200]}")
    import traceback
    traceback.print_exc()

print("\n" + "=" * 70)


STEP 4: EXTRACTING ACTUAL FEATURE NAMES FROM MODEL

‚ö†Ô∏è Model doesn't have get_feature_names_out method

‚ùå Could not get feature names from pipeline: Estimator model does not provide get_feature_names_out. Did you mean to call pipeline[:-1].get_feature_names_out()?
‚ö†Ô∏è Using generic feature names: feature_0, feature_1, ..., feature_214

Model expected features (215):
    1. feature_0
    2. feature_1
    3. feature_2
    4. feature_3
    5. feature_4
    6. feature_5
    7. feature_6
    8. feature_7
    9. feature_8
   10. feature_9
   11. feature_10
   12. feature_11
   13. feature_12
   14. feature_13
   15. feature_14
  ...
  206. feature_205
  207. feature_206
  208. feature_207
  209. feature_208
  210. feature_209
  211. feature_210
  212. feature_211
  213. feature_212
  214. feature_213
  215. feature_214

STEP 5: TESTING PREDICTION WITH COMPLETE_DATASET

üìã Original sample: 16 keys
‚úÖ Completed sample: 81 keys

------------------------------------------------------

Traceback (most recent call last):
  File "C:\Users\Kiarie Jeff\AppData\Local\Temp\ipykernel_34148\2101495303.py", line 85, in <module>
    result = predictor.predict(completed_sample)
  File "e:\GitHub\House Price Regression\deployment\predict.py", line 127, in predict
    prediction = self.model.predict(X)[0]
                 ~~~~~~~~~~~~~~~~~~^^^
  File "c:\Users\Kiarie Jeff\miniconda3\envs\env_1\Lib\site-packages\sklearn\pipeline.py", line 601, in predict
    return self.steps[-1][1].predict(Xt, **params)
           ~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
  File "c:\Users\Kiarie Jeff\miniconda3\envs\env_1\Lib\site-packages\sklearn\linear_model\_base.py", line 306, in predict
    return self._decision_function(X)
           ~~~~~~~~~~~~~~~~~~~~~~~^^^
  File "c:\Users\Kiarie Jeff\miniconda3\envs\env_1\Lib\site-packages\sklearn\linear_model\_base.py", line 285, in _decision_function
    X = self._validate_data(X, accept_sparse=["csr", "csc", "coo"], reset=False)
  File "c:\Users\Kiari