In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor

# Load data
df = pd.read_csv("housing.csv")

# Features and target
X = df.drop("median_house_value", axis=1)
y = np.log1p(df["median_house_value"])  # log transform

# Categorical columns
cat_cols = ["ocean_proximity"]
num_cols = [col for col in X.columns if col not in cat_cols]

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
        ("num", "passthrough", num_cols)
    ]
)

# Model
model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=7,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

# Pipeline
pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("model", model)
])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train
pipeline.fit(X_train, y_train)

# Evaluate
preds = np.expm1(pipeline.predict(X_test))
actual = np.expm1(y_test)

print("MAE:", mean_absolute_error(actual, preds))


MAE: 29390.601157491514


In [4]:
#TESTING FOR US-DOONOT PLACE IN SYSTEM
def predict_plot_price():
    user_input = {
        "longitude": -122.26,
        "latitude": 37.84,
        "housing_median_age": 50,
        "total_rooms": 2239,
        "total_bedrooms": 455,
        "population": 990,
        "households": 419,
        "median_income": 1.9911,
        "ocean_proximity": "NEAR BAY"
    }

    user_df = pd.DataFrame([user_input])
    prediction = np.expm1(pipeline.predict(user_df))

    print("Predicted Plot Price:", round(prediction[0], 2))


predict_plot_price()


Predicted Plot Price: 149270.88


In [2]:
#USER INPUT-ACTUAL PLACE IN SYSTEM
def predict_plot_price():
    print("\nEnter plot details:")

    longitude = float(input("Longitude: "))
    latitude = float(input("Latitude: "))
    housing_median_age = float(input("Housing median age: "))
    total_rooms = float(input("Total rooms: "))
    total_bedrooms = float(input("Total bedrooms: "))
    population = float(input("Population: "))
    households = float(input("Households: "))
    median_income = float(input("Median income: "))
    ocean_proximity = input("Ocean proximity (e.g. NEAR BAY, INLAND): ")

    # Encode ocean proximity
    ocean_encoded = le.transform([ocean_proximity])[0]

    user_data = pd.DataFrame([[
        longitude, latitude, housing_median_age, total_rooms,
        total_bedrooms, population, households, median_income,
        ocean_encoded
    ]], columns=X.columns)

    predicted_price = model.predict(user_data)

    print("\nPredicted Plot Price:", round(predicted_price[0], 2))


# Call the function
predict_plot_price()



Enter plot details:
Longitude: -122.26
Latitude: 37.84
Housing median age: 50
Total rooms: 2239
Total bedrooms: 455
Population: 990
Households: 419
Median income: 1.9911
Ocean proximity (e.g. NEAR BAY, INLAND): NEAR BAY

Predicted Plot Price: 142077.42
