In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from joblib import dump

# Q1. Relationship between polynomial functions and kernel functions
print("Q1. Relationship between polynomial functions and kernel functions:")
print("Polynomial functions can be used as kernel functions in machine learning algorithms, specifically in SVM.")
print("The polynomial kernel function transforms the input space into a higher-dimensional space, allowing the SVM to find a hyperplane that can separate the data more effectively.")
print("For a polynomial kernel, the function is usually of the form: (gamma * <x, x'> + coef0) ^ degree")

# Q2. Implementing SVM with a polynomial kernel in Python using Scikit-learn
print("\nQ2. Implementing an SVM with a polynomial kernel:")
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Use only two classes for simplicity
X = X[y != 2]
y = y[y != 2]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the SVC classifier with a polynomial kernel
poly_svc = SVC(kernel='poly', degree=3, C=1.0, gamma='scale')
poly_svc.fit(X_train_scaled, y_train)

# Predict the labels for the testing data
y_pred_poly = poly_svc.predict(X_test_scaled)

# Evaluate the performance
accuracy_poly = accuracy_score(y_test, y_pred_poly)
print(f"Accuracy of polynomial kernel SVM: {accuracy_poly:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred_poly))

# Q3. Effect of epsilon on the number of support vectors in SVR
print("\nQ3. Effect of epsilon on the number of support vectors in SVR:")
print("Increasing the value of epsilon in SVR makes the model less sensitive to small changes in the training data.")
print("This can result in fewer support vectors, as the model becomes less likely to fit the noise in the training data.")

# Q4. Choice of kernel function, C parameter, epsilon, and gamma in SVR
print("\nQ4. Effect of kernel function, C, epsilon, and gamma in SVR:")
print("1. Kernel Function: Determines the type of transformation applied to the data. Common kernels include linear, polynomial, and RBF.")
print("   - Polynomial: Useful for capturing non-linear relationships.")
print("   - RBF: Effective for capturing complex patterns in high-dimensional space.")
print("2. C Parameter: Controls the trade-off between achieving a low error on the training data and minimizing the model complexity.")
print("   - A higher C tries to fit the training data better but may lead to overfitting.")
print("   - A lower C results in a simpler model and may lead to underfitting.")
print("3. Epsilon Parameter: Defines a margin of tolerance where no penalty is given for errors.")
print("   - Larger epsilon values allow for more tolerance and fewer support vectors.")
print("   - Smaller epsilon values make the model more sensitive to errors.")
print("4. Gamma Parameter: Defines how far the influence of a single training example reaches.")
print("   - High gamma values mean a small radius of influence, leading to more complex models.")
print("   - Low gamma values mean a larger radius of influence, leading to smoother models.")

# Q5. Assignment Implementation
# Load a dataset
# For this example, using the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Preprocess the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the SVC classifier
svc = SVC(kernel='linear')  # Start with a linear kernel
svc.fit(X_train_scaled, y_train)

# Predict the labels for the testing data
y_pred = svc.predict(X_test_scaled)

# Evaluate the performance
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of linear SVM: {accuracy:.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Hyperparameter tuning with GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf'],
    'degree': [2, 3, 4],  # Only used if kernel='poly'
    'gamma': ['scale', 'auto']  # Only used if kernel='rbf'
}

grid_search = GridSearchCV(SVC(), param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train_scaled, y_train)

# Best parameters and performance
print("\nBest parameters found by GridSearchCV:")
print(grid_search.best_params_)
print(f"Best cross-validation score: {grid_search.best_score_:.2f}")

# Train the tuned classifier on the entire dataset
best_svc = grid_search.best_estimator_
best_svc.fit(X, y)

# Save the trained classifier to a file
dump(best_svc, 'best_svc_model.joblib')
print("\nTrained classifier saved to 'best_svc_model.joblib'.")


Q1. Relationship between polynomial functions and kernel functions:
Polynomial functions can be used as kernel functions in machine learning algorithms, specifically in SVM.
The polynomial kernel function transforms the input space into a higher-dimensional space, allowing the SVM to find a hyperplane that can separate the data more effectively.
For a polynomial kernel, the function is usually of the form: (gamma * <x, x'> + coef0) ^ degree

Q2. Implementing an SVM with a polynomial kernel:
Accuracy of polynomial kernel SVM: 1.00

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Q3. Effect of epsilon on the number of support vectors in SVR:
Increasing the value of epsilon in SVR makes the