In [16]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import LinearSVR, SVR
from sklearn.metrics import  mean_squared_error
from sklearn.model_selection import RandomizedSearchCV, train_test_split
import matplotlib.pyplot as plt

In [17]:
data = fetch_california_housing()
X = data.data
y = data.target
description = data.DESCR
features = data.feature_names
y_names = data.target_names

In [18]:
print("Conjunto de dados (X):\n", X)
print("Rótulos (y):\n",  y)
print("Descrição do dataset:\n", data.DESCR)
print("Características (features):\n", data.feature_names)
print("Classes (target names):\n", data.target_names)

Conjunto de dados (X):
 [[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]]
Rótulos (y):
 [4.526 3.585 3.521 ... 0.923 0.847 0.894]
Descrição do dataset:
 .. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

:Number of Instances: 20640

:Number of Attributes: 8 numeric, predictive attributes and the target

:Attribute Information:
    - MedInc        median income in block group
    - HouseAge      median house age in b

In [19]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=60)

In [20]:
modelo_svr = make_pipeline(
    PolynomialFeatures(),
    StandardScaler(),
    LinearSVR(random_state= 60, max_iter = 10000)
)

In [21]:
param_grid = {
    'polynomialfeatures__degree': [1, 2, 3, 4, 5],
    'linearsvr__epsilon': np.linspace(0.01, 1.5, 100),
    'linearsvr__tol': np.linspace(0.00001, 0.1, 100),
    'linearsvr__C': np.linspace(0.1, 10, 20),
    'linearsvr__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'],
    'linearsvr__fit_intercept': [True, False],
    'linearsvr__intercept_scaling': np.linspace(0.1, 1.5, 50),
    'linearsvr__dual': [False, True]
}

In [None]:
model_svm_linear = RandomizedSearchCV(modelo_svr, param_grid,n_iter=100, random_state=60)
model_svm_linear.fit(X_train, Y_train)

In [23]:
pred = model_svm_linear.predict(X_test)
mse = mean_squared_error(Y_test, pred)
print("Mean Squared Error:", mse)
print("Best parameters found:", model_svm_linear.best_params_)

Mean Squared Error: 0.5540102354757618
Best parameters found: {'polynomialfeatures__degree': 1, 'linearsvr__tol': 0.01415, 'linearsvr__loss': 'epsilon_insensitive', 'linearsvr__intercept_scaling': 0.5, 'linearsvr__fit_intercept': True, 'linearsvr__epsilon': 0.9431313131313132, 'linearsvr__dual': True, 'linearsvr__C': 1.6631578947368424}


In [32]:
pipeline = make_pipeline(
    StandardScaler(),
    SVR(max_iter = 10000)
)

In [38]:
param_grid = {
    'svr__epsilon': np.linspace(0.01, 1, 20),
    'svr__tol': np.logspace(-6, -2, 10),
    'svr__C': np.logspace(-2, 3, 15),
    'svr__kernel': ['linear', 'rbf'],
    'svr__gamma': np.logspace(-3, 2, 15),
}

In [None]:
modelo = RandomizedSearchCV(pipeline, param_grid, n_iter=200, random_state=60)
modelo.fit(X_train, Y_train)

In [40]:

pred = modelo.predict(X_test)
mse = mean_squared_error(Y_test, pred)

print("Mean Squared Error:", mse)
print("Best parameters found:", modelo.best_params_)

Mean Squared Error: 0.30018841811845687
Best parameters found: {'svr__tol': 0.0001668100537200059, 'svr__kernel': 'rbf', 'svr__gamma': 0.31622776601683794, 'svr__epsilon': 0.4268421052631579, 'svr__C': 7.196856730011521}
