In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, TargetEncoder
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from joblib import dump, load
from sklearn.pipeline import Pipeline
from preprocessor import RestoreColumnNames
from preprocessor import FeatureEngineering

df = pd.read_csv("../house_data_done.csv")
df = df.drop("Unnamed: 0", axis=1)

X = df.drop(columns=["price"])  # Features
y = df["price"]                 # Target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Log-transform target for training
y_train_logged = np.log1p(y_train)
y_test_logged = np.log1p(y_test)




# 2. Preprocessing
state_and_town_feat = ["state", "town"]
title_feat = ["title"]

one_hot = OneHotEncoder(handle_unknown='ignore', sparse_output=False, drop='first')
t_encoder = TargetEncoder(target_type='continuous', cv=5)

preprocessor = ColumnTransformer(
    [("One_hot", one_hot, state_and_town_feat),
     ("T_Encoder", t_encoder, title_feat)],
    remainder="passthrough"
)

# Get column names after fitting
preprocessor.fit(X_train,y_train_logged )
final_columns = (
    preprocessor.named_transformers_["One_hot"].get_feature_names_out(state_and_town_feat).tolist()
    + title_feat
    + [col for col in preprocessor.feature_names_in_ if col not in state_and_town_feat + title_feat]
)

# Wrapped in RestoreColumnNames
named_preprocessor = RestoreColumnNames(preprocessor, final_columns)

# Wrap FeatureEnfineering in a variable
feature_engineering = FeatureEngineering()

# 3. Pipeline (Preprocessing + Feature Engineering + Model)
model = RandomForestRegressor(
    n_estimators=400,
    max_depth=100,
    min_samples_split= 2,
    min_samples_leaf=2,
    max_leaf_nodes = None,
    random_state=42
)

pipeline = Pipeline([
    ("preprocessor", named_preprocessor),
    ("feature_engineering", feature_engineering),
    ("model", model)
])

# 4. Fit
y_train_logged = np.log1p(y_train)  # log target
pipeline.fit(X_train, y_train_logged)

# 5. Save

    

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [2]:
dump(pipeline, "house_price_pipeline.joblib")

['house_price_pipeline.joblib']

In [3]:
def HousePrediction(df, filename, log_target=True):
    """
    Predict house prices from given dataframe using a saved pipeline.
    
    df : pandas DataFrame with same columns as training data.
    filename : Path to saved joblib pipeline.
    log_target : If True, model was trained on log1p target.
    """
    # Load pipeline (preprocessing + feature engineering + model)
    pipeline = load(filename)
    
    # Predict
    y_pred = pipeline.predict(df)
    
    # Unlog if model was trained on log values
    if log_target:
        y_pred = np.expm1(y_pred)
    
    return f"The Annual estimatated price in Naira is ₦{y_pred[0]:.1f}"

In [4]:
import pandas as pd
from joblib import dump, load
data = {
    "bedroom" : [3],
    "parking_lot" : [0],
    "bathroom" : [2], 
    "toilets" : [3],
    "town" : ["Ojodu"],
    "state" : ["Lagos"],
    "serviced" : [0],
    "extras" : [4],
    "Stable Electricity" : [1],
    "title" : ["flat apartment"],
}
df_predict = pd.DataFrame(data)
df_predict

Unnamed: 0,bedroom,parking_lot,bathroom,toilets,town,state,serviced,extras,Stable Electricity,title
0,3,0,2,3,Ojodu,Lagos,0,4,1,flat apartment


In [5]:
HousePrediction(df_predict, "house_price_pipeline.joblib")

'The Annual estimatated price in Naira is ₦2557632.6'

Unnamed: 0,bedroom,parking_lot,bathroom,toilets,town,state,serviced,extras,Stable Electricity,title
0,1,0,1,1,Ojodu,Ogun,0,1,0,flat apartment
