### Importing the necessary modules

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
housing = fetch_california_housing()
housing

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

### Load the California housing dataset

In [3]:
# housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = pd.Series(housing.target)

### Split the dataset into training (80%) and testing (20%) sets

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Standardize the features

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Initialize models

In [6]:
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree (Squared Error)': DecisionTreeRegressor(criterion='squared_error', random_state=42),
    'Decision Tree (Absolute Error)': DecisionTreeRegressor(criterion='absolute_error', random_state=42),
    'SVM Regression': SVR()
}

### Train and evaluate each model

In [7]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} - MSE: {mse:.4f}, R2 Score: {r2:.4f}")

Linear Regression - MSE: 0.5559, R2 Score: 0.5758
Decision Tree (Squared Error) - MSE: 0.4940, R2 Score: 0.6230
Decision Tree (Absolute Error) - MSE: 0.5246, R2 Score: 0.5997
SVM Regression - MSE: 0.3570, R2 Score: 0.7276


### Hyperparameter tuning for SVM Regression

In [8]:
param_grid = {
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(SVR(), param_grid, cv=5, scoring='r2', verbose=2)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   6.4s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   6.4s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   6.5s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  13.4s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=  20.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  30.1s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  29.5s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  29.5s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  29.4s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=  30.3s
[CV] END .................C=0.1, gamma=scale, kernel=sigmoid; total time=  38.1s
[CV] END .................C=0.1, gamma=scale, k

### Best parameters and best score

In [9]:
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"Best Hyperparameters for SVM: {best_params}")
print(f"Best Cross-Validation R2 Score: {best_score:.4f}")

Best Hyperparameters for SVM: {'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}
Best Cross-Validation R2 Score: 0.7620


### Evaluate the tuned model on the test set

In [10]:
best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_test)
final_mse = mean_squared_error(y_test, y_pred)
final_r2 = r2_score(y_test, y_pred)

print(f"Tuned SVM - Final MSE: {final_mse:.4f}, Final R2 Score: {final_r2:.4f}")

Tuned SVM - Final MSE: 0.3201, Final R2 Score: 0.9257


### Accuracy

In [11]:
accuracy = final_r2 * 100
print(f"Final accuracy of the tuned model: {accuracy:.2f}%")

Final accuracy of the tuned model: 92.57%
