# Basic Models

In [63]:
from common import get_dataset

X_train, y_train, X_test, y_test = get_dataset()

## Median Income as the Only Predictor

### Without Scaling

In [75]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import cross_val_score


model = LinearRegression()
model.fit(X_train[['median_income']], y_train)

y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

84.11545973248083

### Scaled Median Income

In [65]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler


model = preprocessing_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

model.fit(X_train[['median_income']], y_train)
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

84.11545973248084

### Random Forest

In [66]:
from sklearn.ensemble import RandomForestRegressor

model = preprocessing_pipeline = Pipeline([
  # Note Random Forest Regressor does not need feature scaling
    ('regressor', RandomForestRegressor(random_state=42))
])

md = X_train[['median_income']].copy()

# Note Random Forest Regressor requires y to be 1D array
model.fit(X_train[['median_income']], y_train.values.ravel())
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

96.63043213785954

### Linear Support Vector SVR

In [67]:
from sklearn.svm import SVR


model = preprocessing_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SVR(kernel='linear'))
])

model.fit(X_train[['median_income']], y_train.values.ravel())
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

113.73689353377401

### RBF SVR

In [68]:
model = preprocessing_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SVR(kernel='rbf', C=100, gamma='scale', epsilon=0.1))
])

model.fit(X_train[['median_income']], y_train.values.ravel())
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

94.45755600633943

### Stochastic Gradient Descent Regressor

In [None]:
from sklearn.linear_model import SGDRegressor

model = preprocessing_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', SGDRegressor(max_iter=1000, tol=1e-3, random_state=42))
])

model.fit(X_train[['median_income']], y_train.values.ravel())
y_pred = model.predict(X_test[['median_income']])

rmse = root_mean_squared_error(y_test, y_pred) / 1000
rmse

84.20746863859328