In [3]:
# --- House Prices: Linear Regression on sqft, beds, baths ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

# 1) Load
df = pd.read_csv("train.csv")

# 2) Map fields to your requested features
# square footage ≈ GrLivArea (above-ground living area)
# bedrooms       = BedroomAbvGr
# bathrooms      = FullBath + 0.5*HalfBath + BsmtFullBath + 0.5*BsmtHalfBath
df["BathroomsTotal"] = (df["FullBath"].fillna(0) + 0.5*df["HalfBath"].fillna(0)
                        + df["BsmtFullBath"].fillna(0) + 0.5*df["BsmtHalfBath"].fillna(0))

feat_cols = ["GrLivArea", "BedroomAbvGr", "BathroomsTotal"]
X = df[feat_cols]
y = df["SalePrice"]

# 3) Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4) Train
model = LinearRegression()
model.fit(X_train, y_train)

# 5) Evaluate
pred = model.predict(X_test)
print("R^2:", r2_score(y_test, pred))
print("MAE:", mean_absolute_error(y_test, pred))

# 6) Example inference
example = pd.DataFrame({"GrLivArea":[2000], "BedroomAbvGr":[3], "BathroomsTotal":[2.5]})
print("Predicted price for example:", model.predict(example)[0])


R^2: 0.6579346254018674
MAE: 34395.802888482795
Predicted price for example: 232016.59114361883
