# Basic Models

In [63]:
from common import get_dataset

X_train, y_train, X_test, y_test = get_dataset()

## Median Income as the Only Predictor

### Without Scaling

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import cross_val_score

model = LinearRegression()

rmse = -cross_val_score(estimator=model, X=X_train[['median_income']], y=y_train.values.ravel(), cv=5, scoring='neg_root_mean_squared_error') / 1000
rmse.min()

np.float64(82.30143383368546)

### Scaled Median Income

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


model = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

rmse = -cross_val_score(estimator=model, X=X_train[['median_income']], y=y_train.values.ravel(), cv=5, scoring='neg_root_mean_squared_error') / 1000
rmse.min()

np.float64(82.30143383368546)

### Random Forest

In [83]:
from sklearn.ensemble import RandomForestRegressor

model = Pipeline([
  # Note Random Forest Regressor does not need feature scaling
    ('regressor', RandomForestRegressor(random_state=42))
])

# Note Random Forest Regressor requires y to be 1D array
rmse = -cross_val_score(estimator=model, X=X_train[['median_income']], y=y_train.values.ravel(), cv=5, scoring='neg_root_mean_squared_error') / 1000
rmse.min()

np.float64(94.59822948012433)

### Linear Support Vector SVR

In [None]:
from sklearn.svm import SVR


model = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SVR(kernel='linear'))
])

rmse = -cross_val_score(estimator=model, X=X_train[['median_income']], y=y_train.values.ravel(), cv=5, scoring='neg_root_mean_squared_error') / 1000
rmse.min()

np.float64(112.26795407347454)

### RBF SVR

In [None]:
model = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SVR(kernel='rbf', C=100, gamma='scale', epsilon=0.1))
])

rmse = -cross_val_score(estimator=model, X=X_train[['median_income']], y=y_train.values.ravel(), cv=5, scoring='neg_root_mean_squared_error') / 1000
rmse.min()

np.float64(93.37584446131761)

### Stochastic Gradient Descent Regressor

In [None]:
from sklearn.linear_model import SGDRegressor

model = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SGDRegressor(max_iter=1000, tol=1e-3, random_state=42))
])

rmse = -cross_val_score(estimator=model, X=X_train[['median_income']], y=y_train.values.ravel(), cv=5, scoring='neg_root_mean_squared_error') / 1000
rmse.min()

np.float64(82.32131936522056)