In [25]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing.data
y = housing.target

In [26]:
print(housing.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

:Number of Instances: 20640

:Number of Attributes: 8 numeric, predictive attributes and the target

:Attribute Information:
    - MedInc        median income in block group
    - HouseAge      median house age in block group
    - AveRooms      average number of rooms per household
    - AveBedrms     average number of bedrooms per household
    - Population    block group population
    - AveOccup      average number of household members
    - Latitude      block group latitude
    - Longitude     block group longitude

:Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived from the 1990 U.S. census, using one row per ce

In [27]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [38]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVR, SVR

pipl = Pipeline([
    ('scaler', StandardScaler()),
    ('svr', SVR())
])

pipl.fit(X_train, y_train)

# Predict on the test data
y_pred = pipl.predict(X_test)

# Evaluate the performance
from sklearn.metrics import mean_squared_error

y_pred = pipl.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

np.float64(0.3570026426754465)

In [39]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform

param_distributions = {
    "svr__gamma": loguniform(0.001, 0.1),
    "svr__C": uniform(1, 10)
}

rnd = RandomizedSearchCV(pipl, param_distributions=param_distributions , cv=3, n_iter=100, random_state=42)
rnd.fit(X_train[:2000], y_train[:2000])

rnd.best_score_

np.float64(0.7572774474774366)

In [40]:
y_pred = pipl.predict(X_test)

# Evaluate the performance
from sklearn.metrics import mean_squared_error

y_pred = pipl.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse

np.float64(0.3570026426754465)