Step 1: Import necessary libraries and load the dataset

In [9]:
# TODO 1: Import required libraries
# 1. fetch the california housing dataset
# 2. split the data into training and testing sets
# 3. import hyperopt library
# 4. import autokeras library
# 5. import required regression models from sklearn (LinearRegression, Ridge, Lasso, DecisionTreeRegressor, RandomForestRegressor, GradientBoostingRegressor, SVR, KNeighborsRegressor)
# 6. import required metrics from sklearn (mean_squared_error)

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from hyperopt import hp, fmin, tpe, Trials
import autokeras as ak
from sklearn.metrics import mean_squared_error
from hyperopt import hp, fmin, tpe, Trials
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
# Load the California Housing dataset
data = fetch_california_housing()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
# TODO 2: Define the hyperparameter search space for AutoKeras using Hyperopt
space = {
    'model': hp.choice('model', [
        {'type': 'linear_regression'},
        {'type': 'ridge', 'alpha': hp.uniform('ridge_alpha', 0.0, 2.0)},
        {'type': 'lasso', 'alpha': hp.uniform('lasso_alpha', 0.0, 2.0)},
        {'type': 'decision_tree', 'max_depth': hp.choice('dt_max_depth', [None, 3, 5, 7])},
        {'type': 'random_forest', 'n_estimators': hp.choice('rf_n_estimators', [50, 100, 150])},
        {'type': 'gradient_boosting', 'n_estimators': hp.choice('gb_n_estimators', [50, 100, 150]),
         'max_depth': hp.choice('gb_max_depth', [3, 5, 7])},
        {'type': 'svr', 'C': hp.loguniform('svr_C', -5, 2),
         'gamma': hp.loguniform('svr_gamma', -5, 2)},
        {'type': 'knn', 'n_neighbors': hp.choice('knn_n_neighbors', [3, 5, 7])}
    ])
}


In [11]:
def objective(params):
    model_type = params['model']['type']
    
    if model_type == 'linear_regression':
        model = LinearRegression()
    elif model_type == 'ridge':
        model = Ridge(alpha=params['model']['alpha'])
    elif model_type == 'lasso':
        model = Lasso(alpha=params['model']['alpha'])
    elif model_type == 'decision_tree':
        model = DecisionTreeRegressor(max_depth=params['model']['max_depth'])
    elif model_type == 'random_forest':
        model = RandomForestRegressor(n_estimators=params['model']['n_estimators'])
    elif model_type == 'gradient_boosting':
        model = GradientBoostingRegressor(n_estimators=params['model']['n_estimators'],
                                          max_depth=params['model']['max_depth'])
    elif model_type == 'svr':
        model = SVR(C=params['model']['C'], gamma=params['model']['gamma'])
    elif model_type == 'knn':
        model = KNeighborsRegressor(n_neighbors=params['model']['n_neighbors'])
    else:
        raise ValueError("Invalid model type")
    
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    return mse


In [12]:
# TODO 5: Use Tree of Parzen Estimators (TPE) for hyperparameter optimization
trials = Trials()
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=10, trials=trials)

# Extract the best hyperparameters
import hyperopt
best_params = hyperopt.space_eval(space, best)


100%|██████████| 10/10 [02:25<00:00, 14.58s/trial, best loss: 0.21736102404932534]


IndexError: list index out of range

In [19]:
# TODO step 6 get test accuracy
mse = objective(best_params)
mse

In [17]:
best_params

({'model': {'max_depth': 7, 'n_estimators': 100, 'type': 'gradient_boosting'}},
 'gradient_boosting')

In [21]:
# TODO 6: Train AutoKeras
best_model = ak.StructuredDataRegressor(max_trials=10, overwrite=True)
best_model.fit(X_train, y_train, verbose=0)  # TODO 7: Set verbose=0 for less output

# Evaluate the best model on the test set
predictions = best_model.predict(X_test)
test_mse = mean_squared_error(y_test, predictions)
print("Test MSE:", test_mse)


INFO:tensorflow:Assets written to: .\structured_data_regressor\best_model\assets
Test MSE: 0.2869356141179684
