In [10]:
import joblib
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np # Import numpy

# Define the custom transformer class (as it was when saving the pipeline)
class RealEstateTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_transformed = X.copy()

        # Convert 'Price' to numerical format
        def convert_price(price):
            if isinstance(price, str):
                price = price.replace(',', '').strip()
                if 'L' in price:
                    return float(price.replace('L', '')) * 1e5
                elif 'Cr' in price:
                    return float(price.replace('Cr', '')) * 1e7
            # Handle potential non-string inputs gracefully, though the example data has strings
            try:
                return float(price)
            except (ValueError, TypeError):
                return None # Or some other indicator of a bad value

        X_transformed['Price'] = X_transformed['Price'].apply(convert_price)

        # Extract 'City' from 'Location'
        X_transformed['City_Name'] = X_transformed['Location'].apply(lambda x: x.split(',')[-1].strip() if isinstance(x, str) else None)

        # Extract BHK from 'Property Title' or other relevant column
        # This is a placeholder; you'll need to adapt this based on your actual feature engineering
        def extract_bhk(title):
            if isinstance(title, str):
                bhk_match = re.search(r'(\d+)\s*BHK', title, re.IGNORECASE)
                if bhk_match:
                    return int(bhk_match.group(1))
            return None # Or a default value like 0

        import re # Import re module
        X_transformed['BHK'] = X_transformed['Property Title'].apply(extract_bhk)

        # Example of creating other missing features (placeholders - adjust as needed)
        X_transformed['BHK_Category'] = X_transformed['BHK'].apply(lambda x: '2BHK' if x == 2 else ('3BHK' if x == 3 else 'Other'))
        X_transformed['Total_Rooms'] = X_transformed['BHK'] + X_transformed['Baths'] # Assuming Baths are also rooms
        X_transformed['Is_Premium_Size'] = X_transformed['Total_Area'].apply(lambda x: 1 if x > 2000 else 0) # Example threshold
        X_transformed['log_Area_x_BHK'] = (X_transformed['Total_Area'] * X_transformed['BHK']).apply(lambda x: np.log1p(x) if x is not None else None) # Import numpy
        X_transformed['Bath_to_BHK_ratio'] = (X_transformed['Baths'] / X_transformed['BHK']).replace([np.inf, -np.inf], np.nan) # Handle division by zero
        X_transformed['Area_x_Baths'] = X_transformed['Total_Area'] * X_transformed['Baths']
        X_transformed['log_area_per_room'] = (X_transformed['Total_Area'] / X_transformed['Total_Rooms']).apply(lambda x: np.log1p(x) if x is not None else None)
        X_transformed['Property_Size_Category'] = X_transformed['Total_Area'].apply(lambda x: 'Large' if x > 1500 else ('Medium' if x > 800 else 'Small'))
        X_transformed['Area_Efficiency'] = X_transformed['Price_per_SQFT'] / X_transformed['Total_Area'] # Example
        X_transformed['log_area'] = X_transformed['Total_Area'].apply(lambda x: np.log1p(x) if x is not None else None)
        X_transformed['Price_per_Room'] = (X_transformed['Price'] / X_transformed['Total_Rooms']).replace([np.inf, -np.inf], np.nan)
        X_transformed['Area_x_BHK'] = X_transformed['Total_Area'] * X_transformed['BHK']
        X_transformed['Neighborhood'] = X_transformed['Location'].apply(lambda x: x.split(',')[0].strip() if isinstance(x, str) else None)
        X_transformed['Has_Multiple_Baths'] = X_transformed['Baths'].apply(lambda x: 1 if x > 1 else 0)
        X_transformed['Luxury_Score'] = X_transformed['Price_per_SQFT'] * X_transformed['Total_Area'] / 100000 # Example calculation
        X_transformed['Has_Balcony'] = X_transformed['Balcony'].apply(lambda x: 1 if x == 'Yes' else 0)


        # Drop original columns if they were replaced or are no longer needed
        # X_transformed = X_transformed.drop(['Price', 'Location'], axis=1) # Uncomment if you dropped these

        return X_transformed


unseen_data = pd.DataFrame({
    'Property Title': ['2 BHK Apartment in a Gated Community', '4 BHK Independent Villa'],
    'Price': ['95 L', '3.2 Cr'],
    'Location': ['Velachery, Chennai', 'Gachibowli, Hyderabad'],
    'Total_Area': [1150, 3400],
    'Price_per_SQFT': [8260, 9411],
    'Baths': [2, 4],
    'Balcony': ['Yes', 'Yes']
})

# Load the pipeline
loaded_pipeline = joblib.load('/content/property_price_pipeline.joblib')

# Use the loaded pipeline to make predictions
predicted_prices = loaded_pipeline.predict(unseen_data)

# Display the results
print("\n--- Predictions on Unseen Data ---")
for i, prediction in enumerate(predicted_prices):
    print(f"For property: '{unseen_data['Property Title'][i]}'")
    print(f"   -> Predicted Price: ₹{prediction:.2f} Lakhs\n")


--- Predictions on Unseen Data ---
For property: '2 BHK Apartment in a Gated Community'
   -> Predicted Price: ₹62.74 Lakhs

For property: '4 BHK Independent Villa'
   -> Predicted Price: ₹134.09 Lakhs

