# SVM 

In [2]:
pip install plotly

Collecting plotlyNote: you may need to restart the kernel to use updated packages.

  Downloading plotly-5.20.0-py3-none-any.whl.metadata (7.0 kB)
Downloading plotly-5.20.0-py3-none-any.whl (15.7 MB)
   ---------------------------------------- 0.0/15.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.7 MB ? eta -:--:--
   ---------------------------------------- 0.0/15.7 MB 186.2 kB/s eta 0:01:25
   ---------------------------------------- 0.0/15.7 MB 186.2 kB/s eta 0:01:25
   ---------------------------------------- 0.0/15.7 MB 186.2 kB/s eta 0:01:25
   ---------------------------------------- 0.0/15.7 MB 186.2 kB/s eta 0:01:25
   ---------------------------------------- 0.0/15.7 MB 108.9 kB/s eta 0:02:24
   ---------------------------------------- 0.1/15.7 MB 148.8 kB/s eta 0:01:46
   ---------------------------------------- 0.

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR, SVC
from sklearn.metrics import mean_squared_error, accuracy_score
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

In [5]:
diamonds=sns.load_dataset('diamonds')
X = diamonds[['carat', 'depth', 'table', 'x', 'y', 'z']]
y = diamonds['price']
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# SVR model
svr = SVR(kernel='linear')
svr.fit(X_train, y_train)

In [6]:
y_pred = svr.predict(X_test)
mean_squared_error(y_test, y_pred)
print("SVR mean squared error:", mean_squared_error(y_test, y_pred))

SVR mean squared error: 3771600.163857611


In [7]:
diamonds=sns.load_dataset('diamonds')
y_class=pd.cut(diamonds["price"],bins=2,labels=[0,1])
# Predictions
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# SVR model
svc = SVR(kernel='linear')
svc.fit(X_train, y_train)
y_pred_svc = svc.predict(X_test)
accuracy_score(y_test, y_pred_svc)
print("SVC mean accuracy:",accuracy_score(y_test, y_pred_svc))

ValueError: Classification metrics can't handle a mix of multiclass and continuous targets

In [None]:
# Plotting with Plotly
fig = go.Figure()

# SVR scatter plot
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers', name='SVR', marker=dict(color='blue')))

# SVC scatter plot
fig.add_trace(go.Scatter(x=y_test, y=y_pred_svc, mode='markers', name='SVC', marker=dict(color='red')))

fig.update_layout(
    xaxis_title='Actual Price',
    yaxis_title='Predicted Price (SVR) / Binary Prediction (SVC)',
    title='SVR vs SVC on Diamond Dataset'
)

fig.show()

# hyperparemeter tuning 


Grid Search:

Grid search is a simple hyperparameter tuning technique where you define a grid of hyperparameter values to search over.
For each combination of hyperparameters in the grid, the model is trained and evaluated using cross-validation.
After evaluating all combinations, the set of hyperparameters that yields the best performance on the validation data is selected.
Grid search exhaustively searches through all specified hyperparameter values, which can be computationally expensive for large hyperparameter spaces.
Random Search:

Random search is a more efficient hyperparameter tuning technique compared to grid search.
Instead of exhaustively searching through all specified hyperparameter values, random search randomly samples hyperparameter values from predefined distributions.
This approach allows for a more diverse exploration of the hyperparameter space, potentially leading to better performance compared to grid search with fewer evaluations.
Random search is especially useful when the search space is large and the computational resources are limited.
Bayesian Optimization:

Bayesian optimization is a probabilistic model-based hyperparameter tuning technique.
It builds a probabilistic surrogate model of the objective function (e.g., validation accuracy) and uses it to decide where to sample the next set of hyperparameters.
By iteratively updating the surrogate model based on the observed outcomes, Bayesian optimization efficiently explores the hyperparameter space and focuses the search on promising regions.
This method tends to require fewer evaluations compared to grid search and random search, making it suitable for expensive-to-evaluate models.
Manual Tuning:

Manual tuning involves manually adjusting hyperparameters based on domain knowledge, intuition, or trial and error.
This approach is straightforward but can be time-consuming and subjective.
Manual tuning is often used in combination with other hyperparameter tuning techniques, especially when the hyperparameter space is small or when the practitioner has prior knowledge about the problem domain.


# Grid search 

In [5]:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Load the Iris dataset
iris = load_iris()


pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standard scaling
    ('svm', SVC())  # Support Vector Classifier
])

# Define the grid of hyperparameters to search over
param_grid = {
    'svm__C': [0.1, 1, 10, 100],  # Regularization parameter for SVM
    'svm__kernel': ['linear', 'poly', 'rbf']  # Kernel type for SVM
}

# Define the grid search with cross-validation
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5)

# Perform grid search
grid_search.fit(iris.data, iris.target)

# Print the best hyperparameters found
print("Best hyperparameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)


Best hyperparameters: {'svm__C': 10, 'svm__kernel': 'rbf'}
Best cross-validation score: 0.9733333333333334


# Random search

In [9]:

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from scipy.stats import uniform

# Load the Iris dataset
iris = load_iris()


pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standard scaling
    ('svm', SVC())  # Support Vector Classifier
])

# Define the grid of hyperparameters to search over
param_dist = {
    'svm__C':uniform(0.1, 100),  # Regularization parameter for SVM
    'svm__kernel': ['linear', 'poly', 'rbf']  # Kernel type for SVM
}

# Define the grid search with cross-validation
random= RandomizedSearchCV(estimator=pipeline, param_distributions=param_dist, cv=5,n_iter=10)

# Perform grid search
random.fit(iris.data, iris.target)

# Print the best hyperparameters found
print("Best hyperparameters:", random.best_params_)
print("Best cross-validation score:", random.best_score_)


Best hyperparameters: {'svm__C': 50.86270434352317, 'svm__kernel': 'poly'}
Best cross-validation score: 0.9733333333333334


# Bassain optimization 

In [13]:
pip install scikit-optimize

Note: you may need to restart the kernel to use updated packages.
Collecting scikit-optimize
  Downloading scikit_optimize-0.10.1-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-23.12.0-py3-none-any.whl.metadata (11 kB)
Downloading scikit_optimize-0.10.1-py2.py3-none-any.whl (107 kB)
   ---------------------------------------- 0.0/107.7 kB ? eta -:--:--
   ---------------------------------------- 0.0/107.7 kB ? eta -:--:--
   --- ------------------------------------ 10.2/107.7 kB ? eta -:--:--
   ----------- --------------------------- 30.7/107.7 kB 187.9 kB/s eta 0:00:01
   ----------- --------------------------- 30.7/107.7 kB 187.9 kB/s eta 0:00:01
   ----------- --------------------------- 30.7/107.7 kB 187.9 kB/s eta 0:00:01
   -------------- ------------------------ 41.0/107.7 kB 122.9 kB/s eta 0:00:01
   ---------------------- ---------------- 61.4/107.7 kB 163.6 kB/s eta 0:00:01
   ----------------------------- --------- 8



In [14]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from skopt import BayesSearchCV
from skopt.space import Real, Categorical

# Load the Iris dataset
iris = load_iris()

# Define the pipeline with standard scaling and SVC
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standard scaling
    ('svm', SVC())  # Support Vector Classifier
])

# Define the search space for hyperparameters
search_spaces = {
    'svm__C': Real(0.1, 100, prior='log-uniform'),  # Prior distribution for regularization parameter
    'svm__kernel': Categorical(['linear', 'poly', 'rbf'])  # Kernel type for SVM
}

# Define the Bayesian optimization search with cross-validation
bayes_search = BayesSearchCV(
    estimator=pipeline, 
    search_spaces=search_spaces, 
    cv=5,
    n_iter=10,  # Number of iterations (trials)
    random_state=42
)

# Perform Bayesian optimization search
bayes_search.fit(iris.data, iris.target)

# Print the best hyperparameters found
print("Best hyperparameters:", bayes_search.best_params_)
print("Best cross-validation score:", bayes_search.best_score_)


Best hyperparameters: OrderedDict({'svm__C': 4.267850550142993, 'svm__kernel': 'rbf'})
Best cross-validation score: 0.9800000000000001


# Manual tunning

In [15]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Load the Iris dataset
iris = load_iris()

# Define the pipeline with standard scaling and SVC
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standard scaling
    ('svm', SVC(C=1.0, kernel='rbf'))  # Support Vector Classifier with manual hyperparameters
])

# Perform cross-validation with the pipeline
cross_val_scores = cross_val_score(pipeline, iris.data, iris.target, cv=5)

# Print the cross-validation scores
print("Cross-validation scores:", cross_val_scores)
print("Mean cross-validation score:", cross_val_scores.mean())


Cross-validation scores: [0.96666667 0.96666667 0.96666667 0.93333333 1.        ]
Mean cross-validation score: 0.9666666666666666
