In [None]:


# ### Q1. What is the mathematical formula for a linear SVM?

# The mathematical formula for a linear Support Vector Machine (SVM) is:

# \[ f(x) = w^T x + b \]

# where:
# - \( w \) is the weight vector.
# - \( x \) is the input vector.
# - \( b \) is the bias term.

# ### Q2. What is the objective function of a linear SVM?

# The objective function of a linear SVM is to find the hyperplane that maximizes the margin between the two classes. This can be formulated as:

# \[ \min \frac{1}{2} \|w\|^2 \]

# subject to the constraints:

# \[ y_i (w^T x_i + b) \geq 1 \]

# for all \( i \), where \( y_i \) is the label of the \( i \)-th training sample.

# ### Q3. What is the kernel trick in SVM?

# The kernel trick allows SVMs to create nonlinear decision boundaries by mapping the input features into a higher-dimensional space using a kernel function. The kernel function computes the dot product of the input features in this higher-dimensional space, enabling the SVM to find a linear separation in this transformed space. Common kernel functions include the polynomial kernel, radial basis function (RBF) kernel, and sigmoid kernel.

# ### Q4. What is the role of support vectors in SVM? Explain with an example.

# Support vectors are the data points that lie closest to the decision boundary (hyperplane) and are critical in defining the position and orientation of the hyperplane. They are the most important elements of the training set because they directly affect the optimal hyperplane.

# For example, consider a binary classification problem where we have two classes of points. The support vectors are the points that are on the edge of each class and determine the margin. Removing any of these support vectors would change the position of the optimal hyperplane, whereas removing any other non-support vector would not.

# ### Q5. Illustrate with examples and graphs of Hyperplane, Marginal plane, Soft margin, and Hard margin in SVM.

# Here's a brief description of each concept:

# - **Hyperplane**: The decision boundary that separates the different classes.
# - **Marginal plane**: The planes that run parallel to the hyperplane and pass through the support vectors.
# - **Soft margin**: Allows some misclassification in the training data to achieve better generalization.
# - **Hard margin**: Requires that all training data points are correctly classified with no exceptions.

# ### Q6. SVM Implementation through Iris dataset

# Let's implement a linear SVM classifier on the Iris dataset using scikit-learn, and then plot the decision boundaries and explore the effects of different regularization parameters \( C \).

# #### Load the dataset and split into training and testing sets

# ```python
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from sklearn import datasets
# from sklearn.model_selection import train_test_split
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score

# # Load the Iris dataset
# iris = datasets.load_iris()
# X = iris.data[:, :2]  # we only take the first two features for easy visualization
# y = iris.target

# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# # Train a linear SVM classifier
# svc = SVC(kernel='linear', C=1)
# svc.fit(X_train, y_train)

# # Predict the labels for the testing set
# y_pred = svc.predict(X_test)

# # Compute the accuracy of the model on the testing set
# accuracy = accuracy_score(y_test, y_pred)
# print(f'Accuracy: {accuracy:.2f}')
# ```

# #### Plot the decision boundaries

# ```python
# # Function to plot the decision boundaries
# def plot_decision_boundaries(X, y, model):
#     h = .02  # step size in the mesh
#     x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
#     y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
#     xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
#                          np.arange(y_min, y_max, h))
#     Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
#     Z = Z.reshape(xx.shape)
#     plt.contourf(xx, yy, Z, alpha=0.8)
#     plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', marker='o')
#     plt.xlabel('Sepal length')
#     plt.ylabel('Sepal width')
#     plt.title('Decision boundaries of SVM')
#     plt.show()

# plot_decision_boundaries(X_test, y_test, svc)
# ```

# #### Try different values of the regularization parameter \( C \)

# ```python
# C_values = [0.01, 0.1, 1, 10, 100]
# for C in C_values:
#     svc = SVC(kernel='linear', C=C)
#     svc.fit(X_train, y_train)
#     y_pred = svc.predict(X_test)
#     accuracy = accuracy_score(y_test, y_pred)
#     print(f'C: {C}, Accuracy: {accuracy:.2f}')
#     plot_decision_boundaries(X_test, y_test, svc)
# ```

# ### Bonus Task: Implement a linear SVM classifier from scratch

# Here is a simple implementation of a linear SVM classifier from scratch:

# ```python
# class LinearSVM:
#     def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
#         self.learning_rate = learning_rate
#         self.lambda_param = lambda_param
#         self.n_iters = n_iters
#         self.w = None
#         self.b = None

#     def fit(self, X, y):
#         n_samples, n_features = X.shape
#         y_ = np.where(y <= 0, -1, 1)
#         self.w = np.zeros(n_features)
#         self.b = 0

#         for _ in range(self.n_iters):
#             for idx, x_i in enumerate(X):
#                 condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
#                 if condition:
#                     self.w -= self.learning_rate * (2 * self.lambda_param * self.w)
#                 else:
#                     self.w -= self.learning_rate * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
#                     self.b -= self.learning_rate * y_[idx]

#     def predict(self, X):
#         approx = np.dot(X, self.w) - self.b
#         return np.sign(approx)

# # Load the Iris dataset and split it
# X = iris.data[:, :2]
# y = iris.target
# y = np.where(y == 0, -1, 1)  # Change labels to -1 and 1 for binary classification

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# # Train the custom SVM model
# svm = LinearSVM()
# svm.fit(X_train, y_train)

# # Predict the labels
# y_pred = svm.predict(X_test)

# # Compute the accuracy
# accuracy = accuracy_score(y_test, y_pred)
# print(f'Custom SVM Accuracy: {accuracy:.2f}')

# # Compare with scikit-learn implementation
# svc = SVC(kernel='linear', C=1)
# svc.fit(X_train, y_train)
# y_pred_sklearn = svc.predict(X_test)
# accuracy_sklearn = accuracy_score(y_test, y_pred_sklearn)
# print(f'Scikit-learn SVM Accuracy: {accuracy_sklearn:.2f}')
# ```

