In [1]:
# Q1.

In [2]:
# Polynomial functions and kernel functions are closely related in machine learning.
# Kernel functions allow linear algortihms to operate in a higher-dimensional space without explicitly calculating the transformed features.
# Polynomial kernel is a specific type of kernel function used in SVM and other algorithms. It is capable of capturing complex relationships by introducing polynomial terms to the input features.

In [3]:
# Q2.

In [4]:
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
# Let's implement using iris dataset.
iris = datasets.load_iris()

In [7]:
X = iris.data[:,:2]
y = iris.target

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [9]:
# Create SVM classifier with a polynomial kernel
classifier = SVC(kernel='poly',degree=3,C=1.0,gamma='scale')

In [10]:
classifier.fit(X_train,y_train)

In [12]:
y_pred = classifier.predict(X_test)

In [13]:
accuracy = accuracy_score(y_test,y_pred)
print(f'Accuracy: {accuracy}')

Accuracy: 0.8333333333333334


In [14]:
# Q3.

In [15]:
# Epsilon controls the width of the margin in SVR.

# Larger epsilon means wider margin, which in turn would lead to more data points as support vectors. Model becomes more flexible with larger epsioln.

# Smaller epsilon means narrower margin, would lead to fewer data points as support vectors, and model becomes more robust to deviations within the margin.

In [16]:
# Q4.

In [17]:
# Kernel Function in SVR: Determines the type of mapping applied to input features.
#Examples:
#Linear: Suitable for linear relationships.
#Polynomial: Captures non-linear patterns.
#RBF (Radial Basis Function): Effective for complex, non-linear relationships.

# C Parameter: Controls the trade-off between smoothness and fitting to the training data.
#Examples:
#Small C: Emphasizes smoothness, may underfit noisy data.
#Large C: Emphasizes fitting to training data, may overfit noisy data.

#Epsilon Parameter: Defines the width of the margin around the predicted values.
#Examples:
#Small value: Narrow margin, sensitive to variations.
#Large value: Wider margin, more tolerant to variations.

#Gamma Parameter: Influences the shape of the decision boundary.
#Examples:
#Small Gamma: Wider influence, smoother decision boundary.
#Large Gamma: Narrow influence, more complex decision boundary.

In [18]:
# Q5.

In [41]:
# Let's import necessary libraries
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [20]:
cancer = datasets.load_breast_cancer()

In [22]:
X = cancer.data
y = cancer.target

In [23]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [24]:
#Preprocess the data
scaler = StandardScaler()

In [25]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [26]:
# An instance of the SVC classifier and train it on the training data
svc_classifier = SVC(kernel='rbf', C=1.0, gamma='scale')

In [27]:
svc_classifier.fit(X_train_scaled,y_train)

In [28]:
y_pred = svc_classifier.predict(X_test_scaled)

In [29]:
accuracy = accuracy_score(y_test, y_pred)
clf_report = classification_report(y_test,y_pred)

In [30]:
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(clf_report)

Accuracy: 0.9824561403508771
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [31]:
# Tune hyperparameters using GridSearchCV
parameters = {
    
    'C':[0.001,0.01,0.1,1,10,100,1000],
    'gamma': ['scale', 'auto'],
    'kernel': ['linear', 'rbf', 'poly']

}

In [32]:
grid_search = GridSearchCV(SVC(),param_grid=parameters, cv=5)

In [33]:
grid_search.fit(X_train_scaled, y_train)

In [34]:
grid_search.best_params_

{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}

In [36]:
tuned_svc_classifier = grid_search.best_estimator_

In [38]:
tuned_svc_classifier.fit(X_train_scaled, y_train)

In [39]:
y_pred = tuned_svc_classifier.predict(X_test_scaled)

In [40]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test,y_pred)
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

Accuracy: 0.9824561403508771
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        43
           1       0.97      1.00      0.99        71

    accuracy                           0.98       114
   macro avg       0.99      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [42]:
joblib.dump(tuned_svc_classifier, 'tuned_svc_classifier_breast_cancer.joblib')

['tuned_svc_classifier_breast_cancer.joblib']