In [1]:
#Task 1.1
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the California Housing dataset
dataset = fetch_california_housing()
X = dataset.data
y = dataset.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
linear_models = []
c_values = [0.1, 1, 10, 100, 1000]

for c in c_values:
    # Create the SVR model with a linear kernel
    svr = SVR(kernel='linear', C=c)
    
    # Fit the model on the training data
    svr.fit(X_train, y_train)
    
    # Predict on the testing data
    y_pred = svr.predict(X_test)
    
    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    # Store the model and its performance metrics
    linear_models.append({
        'model': svr,
        'kernel': 'linear',
        'C': c,
        'MSE': mse,
        'MAE': mae
    })

# Print the performance metrics for each linear model
for model in linear_models:
    print(f"Linear SVR (C={model['C']}): MSE = {model['MSE']}, MAE = {model['MAE']}")

Linear SVR (C=0.1): MSE = 0.579545283537956, MAE = 0.5122803954285885
Linear SVR (C=1): MSE = 0.5792291127963858, MAE = 0.5119867120733883
Linear SVR (C=10): MSE = 0.5792357045115284, MAE = 0.5119300751544152
Linear SVR (C=100): MSE = 0.5793657304338318, MAE = 0.5119045900597557
Linear SVR (C=1000): MSE = 0.5809920741163882, MAE = 0.5120700777292171


In [4]:
rbf_models = []
c_values = [0.1, 1, 10]
gamma_values = [0.1, 0.01, 0.001]

for c in c_values:
    for gamma in gamma_values:
        # Create the SVR model with an RBF kernel
        svr = SVR(kernel='rbf', C=c, gamma=gamma)
        
        # Fit the model on the training data
        svr.fit(X_train, y_train)
        
        # Predict on the testing data
        y_pred = svr.predict(X_test)
        
        # Evaluate the model
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        
        # Store the model and its performance metrics
        rbf_models.append({
            'model': svr,
            'kernel': 'rbf',
            'C': c,
            'gamma': gamma,
            'MSE': mse,
            'MAE': mae
        })

# Print the performance metrics for each RBF model
for model in rbf_models:
    print(f"RBF SVR (C={model['C']}, gamma={model['gamma']}): MSE = {model['MSE']}, MAE = {model['MAE']}")


RBF SVR (C=0.1, gamma=0.1): MSE = 0.4242716490862146, MAE = 0.43612703174432826
RBF SVR (C=0.1, gamma=0.01): MSE = 0.5320190299496264, MAE = 0.4997737149792417
RBF SVR (C=0.1, gamma=0.001): MSE = 0.6689881722241799, MAE = 0.5756061698392319
RBF SVR (C=1, gamma=0.1): MSE = 0.3657894482393621, MAE = 0.40501126872465076
RBF SVR (C=1, gamma=0.01): MSE = 0.4703020192819607, MAE = 0.46370068926506447
RBF SVR (C=1, gamma=0.001): MSE = 0.5547867655192305, MAE = 0.5124030413348883
RBF SVR (C=10, gamma=0.1): MSE = 0.33138717447397703, MAE = 0.3831582423265107
RBF SVR (C=10, gamma=0.01): MSE = 0.41821879798435996, MAE = 0.43902804529646206
RBF SVR (C=10, gamma=0.001): MSE = 0.5479558754907253, MAE = 0.49209259154735974


In [8]:
#Task 1.2
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVR
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [9]:
X, y = make_regression(n_samples=100, n_features=10, random_state=42)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
svr = SVR()

In [12]:
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5]
}

In [13]:
grid_search = GridSearchCV(estimator=svr, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

In [14]:
best_params = grid_search.best_params_

In [15]:
best_score = grid_search.best_score_

In [16]:
best_svr = SVR(**best_params)
best_svr.fit(X_train, y_train)

In [17]:
y_pred = best_svr.predict(X_test)

In [18]:
mse = mean_squared_error(y_test, y_pred)

In [19]:
print("Best parameters:", best_params)

Best parameters: {'C': 10, 'epsilon': 0.01, 'kernel': 'linear'}


In [20]:
print("Best score (neg_mean_squared_error):", best_score)

Best score (neg_mean_squared_error): -9.313097204991586e-05


In [21]:
print("Mean Squared Error:", mse)

Mean Squared Error: 4.807623897967266e-05


In [59]:
#Task 1.3
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [60]:
# Load the California Housing dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

In [61]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [62]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [63]:
# Define the parameter grid for GridSearchCV or RandomizedSearchCV
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': [0.1, 1, 10]
}

In [64]:
# RandomizedSearchCV
svr = SVR()
search = RandomizedSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=5, n_iter=10, n_jobs=-1, random_state=42)
search.fit(X_train_scaled, y_train)

In [65]:
# Get the best SVR predictor
best_svr = search.best_estimator_

In [66]:
# Evaluate the best SVR predictor on the test set
y_pred = best_svr.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

In [67]:
print("Best SVR predictor:")
print("Best parameters:", search.best_params_)
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")

Best SVR predictor:
Best parameters: {'kernel': 'rbf', 'gamma': 1, 'C': 1}
MSE: 0.33
MAE: 0.38


In [68]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [69]:
# Load the California Housing dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

In [70]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [71]:
# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('feature_selection', SelectKBest(score_func=f_regression)),
    ('svr', SVR())
])

In [72]:
# Define the parameter grid for GridSearchCV or RandomizedSearchCV
param_grid = {
    'feature_selection__k': [4, 6, 8, 10],  # Number of features to select
    'svr__kernel': ['linear', 'rbf'],
    'svr__C': [0.1, 1, 10, 100],
    'svr__gamma': [0.1, 1, 10]
}

In [74]:
# RandomizedSearchCV
search = RandomizedSearchCV(pipeline, param_grid, scoring='neg_mean_squared_error', cv=5, n_iter=10, n_jobs=-1, random_state=42)

In [75]:
search.fit(X_train, y_train)

25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\rohan\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\rohan\anaconda3\lib\site-packages\sklearn\pipeline.py", line 401, in fit
    Xt = self._fit(X, y, **fit_params_steps)
  File "c:\Users\rohan\anaconda3\lib\site-packages\sklearn\pipeline.py", line 359, in _fit
    X, fitted_transformer = fit_transform_one_cached(
  File "c:\Users\rohan\anaconda3\lib\site-packages\joblib\memory.py", line 349, in __call__
    return self.func(*args, **kwargs)
  File

In [76]:
# Get the best SVR predictor
best_svr = search.best_estimator_

In [77]:
# Evaluate the best SVR predictor on the test set
y_pred = best_svr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

In [78]:
print("Best SVR predictor:")
print("Best parameters:", search.best_params_)
print(f"MSE: {mse:.2f}")
print(f"MAE: {mae:.2f}")

Best SVR predictor:
Best parameters: {'svr__kernel': 'rbf', 'svr__gamma': 1, 'svr__C': 1, 'feature_selection__k': 6}
MSE: 0.39
MAE: 0.41
