In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import pickle
import os

model_dir = "C:/Users/hp/Desktop/MLOps Assignment-01/house-price-predictor"
os.makedirs(model_dir, exist_ok=True)

data = pd.read_csv("C:/Users/hp/Desktop/MLOps Assignment-01/house-price-predictor/HousePriceDataset/Housing.csv")

categorical_features = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot", one_hot, categorical_features)], remainder="passthrough")

model = RandomForestRegressor(n_estimators=100, random_state=42)

pipe = Pipeline(steps=[('transformer', transformer), ('model', model)])

X = data.drop("price", axis=1)
y = data["price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipe.fit(X_train, y_train)

y_preds = pipe.predict(X_test)
print(f"Mean Squared Error: {mean_squared_error(y_test, y_preds)}")

model_path = os.path.join(model_dir, 'house_price_predictor_model.pkl')
with open(model_path, 'wb') as file:
    pickle.dump(pipe, file)

def predict_house_price(input_data):
    with open(model_path, 'rb') as file:
        model = pickle.load(file)
    columns = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement',
               'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']
    input_df = pd.DataFrame([input_data], columns=columns)
    
    prediction = model.predict(input_df)
    return prediction

input_data = [7420, 4, 2, 3, 'yes', 'no', 'no', 'no', 'yes', 2, 'yes', 'furnished']
predicted_price = predict_house_price(input_data)
print(f"Predicted Price: {predicted_price}")


Mean Squared Error: 322415859354.65436
Predicted Price: [12711259.4]


In [1]:
pip show scikit-learn

Name: scikit-learn
Version: 1.5.1
Summary: A set of python modules for machine learning and data mining
Home-page: https://scikit-learn.org
Author: 
Author-email: 
License: new BSD
Location: C:\Users\hp\AppData\Local\Programs\Python\Python312\Lib\site-packages
Requires: joblib, numpy, scipy, threadpoolctl
Required-by: 
Note: you may need to restart the kernel to use updated packages.
