# Basic Models

In [23]:
from common import get_dataset

X_train, y_train, X_test, y_test = get_dataset()

## Median Income as the Only Predictor

Using `median_income` feature for predictions

In [24]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error


model = LinearRegression()
model.fit(X_train[['median_income']], y_train)

y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse


84.11545973248083

## Scaled Median Income as The Only Predictor

In [54]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


model = preprocessing_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

model.fit(X_train[['median_income']], y_train)
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

84.11545973248084

## Random Forest with Scaled Median Income as The Only Predictor

In [None]:
from sklearn.ensemble import RandomForestRegressor

model = preprocessing_pipeline = Pipeline([
  # Note Random Forest Regressor does not need feature scaling
    ('regressor', RandomForestRegressor(random_state=42))
])

md = X_train[['median_income']].copy()

# Note Random Forest Regressor requires y to be 1D array
model.fit(X_train[['median_income']], y_train.values.ravel())
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

96.63043213785954