Assignment week 5:- House Price Prediction.
Resources :
https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques/data

In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import skew

# -------------------------
# Load and preprocess training data
# -------------------------
train = pd.read_csv(r"C:\Users\admin\Downloads\train.csv")
y = train['SalePrice']
train.drop(['SalePrice'], axis=1, inplace=True)
test_dummy = pd.read_csv(r"C:\Users\admin\Downloads\test.csv")

combined = pd.concat([train, test_dummy], keys=['train', 'test'])

# Fill missing values
for col in combined.columns:
    if combined[col].dtype == 'object':
        combined[col] = combined[col].fillna(combined[col].mode()[0])
    else:
        combined[col] = combined[col].fillna(combined[col].median())

# Feature Engineering
combined['TotalSF'] = combined['TotalBsmtSF'] + combined['1stFlrSF'] + combined['2ndFlrSF']
combined['Age'] = combined['YrSold'] - combined['YearBuilt']
combined['RemodAge'] = combined['YrSold'] - combined['YearRemodAdd']
combined.drop(['GarageYrBlt'], axis=1, inplace=True)

# Fix skew
numeric_feats = combined.select_dtypes(include=[np.number]).columns
skewed_feats = combined[numeric_feats].apply(lambda x: skew(x.dropna()))
skewed_feats = skewed_feats[abs(skewed_feats) > 0.75].index
combined[skewed_feats] = np.log1p(combined[skewed_feats])

# One-hot encoding
combined = pd.get_dummies(combined)
X_train = combined.loc['train']
common_cols = X_train.columns

# Log-transform target
y_log = np.log1p(y)

# Train RandomForest model
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_log)

# -------------------------
# Input helper functions
# -------------------------
def safe_int_input(prompt, default):
    try:
        value = input(f"{prompt} [default: {default}]: ").strip()
        return int(value) if value else default
    except ValueError:
        print("⚠️ Invalid input. Using default value.")
        return default

def safe_str_input(prompt, default):
    value = input(f"{prompt} [default: {default}]: ").strip()
    return value if value else default

# -------------------------
# Ask user for input
# -------------------------
print("\n🔍 Enter House Details for Price Prediction\n")

user_input = {
    'Neighborhood': safe_str_input("Neighborhood (e.g., NAmes, CollgCr)", "NAmes"),
    'YearBuilt': safe_int_input("Year Built", 2005),
    'YearRemodAdd': safe_int_input("Year Remodeled", 2007),
    'BedroomAbvGr': safe_int_input("Number of Bedrooms", 3),
    'FullBath': safe_int_input("Number of Full Baths", 2),
    'HalfBath': safe_int_input("Number of Half Baths", 1),
    'GarageCars': safe_int_input("Garage Capacity (cars)", 2),
    'GarageFinish': safe_str_input("Garage Finish (Unf, RFn, Fin)", "RFn"),
    'GrLivArea': safe_int_input("Living Area (sq.ft)", 1800),
    'Fireplaces': safe_int_input("Number of Fireplaces", 1)
}

# Fill in default values for required fields
default_values = {
    'MSSubClass': 60,
    'LotFrontage': 70.0,
    'LotArea': 8000,
    'OverallQual': 6,
    'OverallCond': 5,
    'MasVnrArea': 100.0,
    'BsmtFinSF1': 600,
    'BsmtUnfSF': 400,
    'TotalBsmtSF': 1000,
    '1stFlrSF': 1000,
    '2ndFlrSF': 800,
    'GarageArea': 450,
    'YrSold': 2010,
    'ExterQual': 'Gd',
    'HouseStyle': '2Story',
    'Exterior1st': 'VinylSd',
    'Exterior2nd': 'VinylSd',
    'SaleCondition': 'Normal'
}
user_input.update(default_values)

# Feature engineering
user_input['TotalSF'] = user_input['TotalBsmtSF'] + user_input['1stFlrSF'] + user_input['2ndFlrSF']
user_input['Age'] = user_input['YrSold'] - user_input['YearBuilt']
user_input['RemodAge'] = user_input['YrSold'] - user_input['YearRemodAdd']

# Convert to DataFrame, encode, and align
input_df = pd.DataFrame([user_input])
input_df = pd.get_dummies(input_df)
input_df = input_df.reindex(columns=common_cols, fill_value=0)

# Predict price
predicted_log_price = model.predict(input_df)[0]
predicted_price = np.expm1(predicted_log_price)

# -------------------------
# Output Result
# -------------------------
print("\n🏠 House Summary")
print(f"Neighborhood    : {user_input['Neighborhood']}")
print(f"Built Year      : {user_input['YearBuilt']}")
print(f"Remodeled Year  : {user_input['YearRemodAdd']}")
print(f"Bedrooms        : {user_input['BedroomAbvGr']}")
print(f"Bathrooms       : {user_input['FullBath']} Full, {user_input['HalfBath']} Half")
print(f"Garage          : {user_input['GarageCars']}-car, Finish: {user_input['GarageFinish']}")
print(f"Living Area     : {user_input['GrLivArea']} sq.ft")
print(f"Fireplaces      : {user_input['Fireplaces']}")
print(f"\n💰 Predicted House Price: ${predicted_price:,.2f}")



🔍 Enter House Details for Price Prediction


🏠 House Summary
Neighborhood    : NAmes
Built Year      : 2005
Remodeled Year  : 2007
Bedrooms        : 3
Bathrooms       : 2 Full, 1 Half
Garage          : 2-car, Finish: RFn
Living Area     : 1800 sq.ft
Fireplaces      : 1

💰 Predicted House Price: $204,014.05
