# Week 2 - Linear Regression 2

In [6]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import numpy as np

# Load and clean data
df = pd.read_csv("austinHousingData.csv")
df = df.select_dtypes(include=[np.number]).dropna()

# Set target and features
y = df['latestPrice']  # Replace with actual target if different
X = df.drop(['latestPrice', 'zpid'], axis=1, errors='ignore')

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model setup
model = GradientBoostingRegressor(
    learning_rate=0.05,
    n_estimators=500,
    max_depth=4,
    subsample=0.8,
    random_state=42
)

# Train and evaluate
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² score:", r2_score(y_test, y_pred))


RMSE: 310340.15604619484
R² score: 0.5930651800573159


In [None]:
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [300, 500, 700],
    'max_depth': [3, 4, 5]
}
grid_search = GridSearchCV(GradientBoostingRegressor(), param_grid, cv=3, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

print("Best Params:", grid_search.best_params_)


KeyboardInterrupt: 