In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Load the California Housing Prices dataset
cali_housing = fetch_california_housing()
X = cali_housing.data  # Features
y = cali_housing.target  # Target variable (median house value)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and fit the SVR model
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train)

# Make predictions using the model
y_pred = svr_model.predict(X_test_scaled)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
# Calculate residuals (difference between actual and predicted values)
residuals = y_test - y_pred

# Create a scatter plot of residuals vs. predicted values
plt.scatter(y_pred, residuals, color='blue', alpha=0.5)
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel("Predicted Median House Value")
plt.ylabel("Residuals (Actual - Predicted)")
plt.title("Residual Plot - SVR")
plt.show()


In [None]:
# svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1, loss='epsilon_insensitive')
# svr_model.fit(X_train_scaled, y_train)
# y_pred = svr_model.predict(X_test_scaled)

# # Evaluate the model with regularization
# mse = mean_squared_error(y_test, y_pred)
# r2 = r2_score(y_test, y_pred)
# print("MSE with Epsilon-Insensitive Loss (L1 Regularization):", mse)
# print("R-squared with Epsilon-Insensitive Loss (L1 Regularization):", r2)

In [None]:
# kernel_types = ['linear', 'poly', 'sigmoid']
# for kernel in kernel_types:
#     svr_model = SVR(kernel=kernel, C=1.0, epsilon=0.1)
#     svr_model.fit(X_train_scaled, y_train)
#     y_pred = svr_model.predict(X_test_scaled)
#     mse = mean_squared_error(y_test, y_pred)
#     r2 = r2_score(y_test, y_pred)
#     print(f"Kernel: {kernel} - MSE: {mse} - R-squared: {r2}")


In [None]:
# Perform feature selection using a suitable method (e.g., SelectKBest with f_regression)
from sklearn.feature_selection import SelectKBest, f_regression

k = 5  # Number of top features to select
feature_selector = SelectKBest(score_func=f_regression, k=k)
X_train_selected = feature_selector.fit_transform(X_train_scaled, y_train)
X_test_selected = feature_selector.transform(X_test_scaled)

# Create and fit the SVR model with selected features
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_selected, y_train)
y_pred = svr_model.predict(X_test_selected)

# Evaluate the model with selected features
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("MSE with Feature Selection:", mse)
print("R-squared with Feature Selection:", r2)
