In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

# 1. Load Dataset (Make sure you upload the 'House Prices' csv)
df = pd.read_csv('train.csv') 

# 2. Feature Selection (Choosing 6 as per brief)
features = ['OverallQual', 'GrLivArea', 'GarageCars', 'FullBath', 'YearBuilt', 'TotalBsmtSF']
X = df[features]
y = df['SalePrice']

# 3. Handling Missing Values
X = X.fillna(X.median())

# 4. Train Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluate
preds = model.predict(X_test)
print(f"R2 Score: {r2_score(y_test, preds):.2f}")

# 6. Save Model (Requirement: PART A, Step 6)
joblib.dump(model, 'house_price_model.pkl')