In [None]:
# Import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Select only two classes for binary classification
X = X[y != 2]
y = y[y != 2]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Q1. Mathematical formula for a linear SVM
print("Mathematical formula for a linear SVM:")
print("f(x) = w * x + b")
print("where w is the weight vector, x is the feature vector, and b is the bias term.")

# Q2. Objective function of a linear SVM
print("\nObjective function of a linear SVM:")
print("minimize 1/2 * ||w||^2 + C * sum(xi)")
print("subject to y_i * (w * x_i + b) >= 1 - xi")

# Q3. The kernel trick in SVM
print("\nThe kernel trick in SVM:")
print("The kernel trick allows SVM to operate in a higher-dimensional space without explicitly computing the coordinates.")
print("It uses a kernel function (e.g., RBF, polynomial) to transform the data.")

# Q4. Role of support vectors in SVM
print("\nRole of support vectors in SVM:")
print("Support vectors are the data points closest to the hyperplane. They determine the position and orientation of the hyperplane.")
print("Example: In a 2D space, support vectors lie on the margin boundaries.")

# Q5. Illustrate Hyperplane, Marginal Plane, Soft Margin, and Hard Margin in SVM
clf = SVC(kernel='linear', C=1)
clf.fit(X_train, y_train)

# Plot decision boundary
plt.figure(figsize=(12, 6))
ax = plt.gca()

# Create grid to plot
xx, yy = np.meshgrid(np.linspace(X[:, 0].min() - 1, X[:, 0].max() + 1, 500),
                     np.linspace(X[:, 1].min() - 1, X[:, 1].max() + 1, 500))
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plotting
ax.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.Paired)
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', marker='o', cmap=plt.cm.Paired)
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, edgecolors='k', marker='s', cmap=plt.cm.Paired, alpha=0.5)
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
plt.title('Decision Boundary with Linear SVM')
plt.show()

# Q6. SVM Implementation through Iris dataset
print("\nSVM Implementation with Iris Dataset:")
clf = SVC(kernel='linear', C=1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy of SVM model: {accuracy:.2f}")

# Bonus task: Implement a linear SVM classifier from scratch using Python
from sklearn.preprocessing import StandardScaler

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset again for consistency
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Linear SVM from scratch
class LinearSVM:
    def __init__(self, learning_rate=0.001, n_iter=1000, C=1.0):
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.C = C
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        y_ = np.where(y <= 0, -1, 1)
        
        for _ in range(self.n_iter):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.weights) + self.bias) >= 1
                if condition:
                    self.weights -= self.learning_rate * (2 * 1/self.n_iter * self.weights)
                else:
                    self.weights -= self.learning_rate * (2 * 1/self.n_iter * self.weights - np.dot(x_i, y_[idx]))
                    self.bias -= self.learning_rate * y_[idx]
    
    def predict(self, X):
        approx = np.dot(X, self.weights) + self.bias
        return np.sign(approx)

# Train and evaluate the custom Linear SVM
model = LinearSVM()
model.fit(X_train, y_train)
y_pred_custom = model.predict(X_test)
accuracy_custom = accuracy_score(y_test, y_pred_custom)

print(f"Accuracy of custom Linear SVM model: {accuracy_custom:.2f}")

# Compare with scikit-learn's implementation
print(f"Accuracy of scikit-learn Linear SVM model: {accuracy:.2f}")

# Plot decision boundaries for the custom SVM model
plt.figure(figsize=(12, 6))
ax = plt.gca()

# Create grid to plot
Z_custom = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z_custom = Z_custom.reshape(xx.shape)

# Plotting
ax.contourf(xx, yy, Z_custom, alpha=0.8, cmap=plt.cm.Paired)
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', marker='o', cmap=plt.cm.Paired)
ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, edgecolors='k', marker='s', cmap=plt.cm.Paired, alpha=0.5)
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
plt.title('Decision Boundary with Custom Linear SVM')
plt.show()
