In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import LinearSVR, SVR
from sklearn.metrics import  mean_squared_error
from sklearn.model_selection import RandomizedSearchCV, train_test_split
import matplotlib.pyplot as plt

In [3]:
data = fetch_california_housing()
X = data.data
y = data.target
description = data.DESCR
features = data.feature_names
y_names = data.target_names

In [4]:
print("Conjunto de dados (X):\n", X)
print("Rótulos (y):\n",  y)
print("Descrição do dataset:\n", data.DESCR)
print("Características (features):\n", data.feature_names)
print("Classes (target names):\n", data.target_names)

Conjunto de dados (X):
 [[   8.3252       41.            6.98412698 ...    2.55555556
    37.88       -122.23      ]
 [   8.3014       21.            6.23813708 ...    2.10984183
    37.86       -122.22      ]
 [   7.2574       52.            8.28813559 ...    2.80225989
    37.85       -122.24      ]
 ...
 [   1.7          17.            5.20554273 ...    2.3256351
    39.43       -121.22      ]
 [   1.8672       18.            5.32951289 ...    2.12320917
    39.43       -121.32      ]
 [   2.3886       16.            5.25471698 ...    2.61698113
    39.37       -121.24      ]]
Rótulos (y):
 [4.526 3.585 3.521 ... 0.923 0.847 0.894]
Descrição do dataset:
 .. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

:Number of Instances: 20640

:Number of Attributes: 8 numeric, predictive attributes and the target

:Attribute Information:
    - MedInc        median income in block group
    - HouseAge      median house age in b

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=60)

In [13]:
modelo_svr = make_pipeline(
    PolynomialFeatures(),
    StandardScaler(),
    LinearSVR(random_state= 60, max_iter = 10000)
)

In [14]:
param_grid = {
    'polynomialfeatures__degree': [1, 2, 3, 4, 5],
    'linearsvr__epsilon': np.linspace(0.01, 1.5, 100),
    'linearsvr__tol': np.linspace(0.00001, 0.1, 100),
    'linearsvr__C': np.linspace(0.1, 10, 20),
    'linearsvr__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive'],
    'linearsvr__fit_intercept': [True, False],
    'linearsvr__intercept_scaling': np.linspace(0.1, 1.5, 50),
    'linearsvr__dual': [False, True]
}

In [15]:
model_svm_linear = RandomizedSearchCV(modelo_svr, param_grid,n_iter=100, random_state=60)
model_svm_linear.fit(X_train, Y_train)



KeyboardInterrupt: 

In [None]:
pred = model_svm_linear.predict(X_test)
mse = mean_squared_error(Y_test, pred)
print("Mean Squared Error:", mse)
print("Best parameters found:", model_svm_linear.best_params_)

In [None]:
pipeline = make_pipeline(
    StandardScaler(),
    SVR(max_iter = 10000)
)

In [None]:
param_grid = {
    'svr__epsilon': np.linspace(0.01, 2, 200),
    'svr__tol': np.linspace(1e-6, 0.1, 200),
    'svr__C': np.linspace(0.1, 100, 50),
    'svr__kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'svr__degree': [2, 3, 4, 5, 6],
    'svr__gamma': np.linspace(0.01, 10, 200),
    'svr__coef0': np.linspace(0, 1, 20),
    'svr__shrinking': [True, False],
    'svr__cache_size': np.linspace(50, 1000, 20),
}

In [None]:
modelo = RandomizedSearchCV(pipeline, param_grid, n_iter=100, random_state=60)
modelo.fit(X_train, Y_train)

In [None]:

pred = modelo.predict(X_test)
mse = mean_squared_error(Y_test, pred)

print("Mean Squared Error:", mse)
print("Best parameters found:", modelo.best_params_)