In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# Load dataset
df = pd.read_csv('/kaggle/input/d/shashanknecrothapa/ames-housing-dataset/AmesHousing.csv')

# Selecting relevant features (Removed "Order")
features = ['Year Built', 'House Style', 'Roof Style', 'Sale Type', 'Sale Condition', 'SalePrice']

# Splitting independent & dependent variables
X = df[features].drop(columns=['SalePrice'])
y = df['SalePrice']

# One-Hot Encoding for categorical features
X = pd.get_dummies(X, columns=['House Style', 'Roof Style', 'Sale Type', 'Sale Condition'], drop_first=True)

# Split into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train RandomForestRegressor (Better for this problem)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Model evaluation
predictions = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae:,.2f}')

# Create a new house example (Ensure all encoded columns match training data)
new_house = pd.DataFrame({
    'Year Built': [2010],
    'House Style_1Story': [0],
    'House Style_2Story': [0],
    'Roof Style_Gable': [1],
    'Roof Style_Hip': [0],
    'Sale Type_New': [1],
    'Sale Type_WD': [0],
    'Sale Condition_Normal': [1],
    'Sale Condition_Partial': [0]
})

# Ensure all features from training data exist in new house input
for feature in X.columns:
    if feature not in new_house.columns:
        new_house[feature] = 0  # Add missing features with value 0

# Scale new house input
new_house_scaled = scaler.transform(new_house[X.columns])

# Predict price
predicted_price = model.predict(new_house_scaled)
print(f'Predicted House Price: ${predicted_price[0]:,.2f}')  


Mean Absolute Error: 42,011.12
Predicted House Price: $225,541.66
