### question 7

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.utils import shuffle

def predict(w, b, x):
    """
    Predict the label for a data point using a linear classifier.
    
    Parameters:
    w (numpy.ndarray): Weight vector
    b (float): Bias term
    x (numpy.ndarray): Data point
    
    Returns:
    int: Predicted label (+1 or -1)
    """
    # Calculate the dot product w·x + b
    activation = np.dot(w, x) + b
    
    # Return the sign of the activation
    return 1 if activation >= 0 else -1

def perceptron_train(X, y, max_iterations=1000):
    """
    Train a binary Perceptron on the given data.
    
    Parameters:
    X (numpy.ndarray): Array of data points (n_samples, n_features)
    y (numpy.ndarray): Array of labels (+1 or -1)
    max_iterations (int): Maximum number of iterations through the dataset
    
    Returns:
    tuple: (w, b, updates) - weight vector, bias term, and number of updates made
    """
    # Get dimensions
    n_samples, n_features = X.shape
    
    # Initialize weights and bias
    w = np.zeros(n_features)
    b = 0
    
    # Counter for updates
    updates = 0
    
    # Training loop
    for _ in range(max_iterations):
        # Randomly permute the data
        X_shuffled, y_shuffled = shuffle(X, y)
        
        # Flag to check if any updates were made in this iteration
        made_update = False
        
        # Go through all data points
        for i in range(n_samples):
            # Get current point and label
            x_i = X_shuffled[i]
            y_i = y_shuffled[i]
            
            # Make prediction
            y_pred = predict(w, b, x_i)
            
            # Update if misclassified
            if y_pred != y_i:
                w = w + y_i * x_i
                b = b + y_i
                updates += 1
                made_update = True
        
        # If no updates were made in this iteration, we've converged
        if not made_update:
            break
    
    return w, b, updates

def plot_decision_boundary(X, y, w, b):
    """
    Plot the data points and the decision boundary.
    
    Parameters:
    X (numpy.ndarray): Array of data points (n_samples, 2)
    y (numpy.ndarray): Array of labels
    w (numpy.ndarray): Weight vector
    b (float): Bias term
    """
    # Create a figure
    plt.figure(figsize=(10, 6))
    
    # Plot the data points
    plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label='Class +1')
    plt.scatter(X[y == -1, 0], X[y == -1, 1], color='red', label='Class -1')
    
    # Plot the decision boundary
    # The decision boundary is where w·x + b = 0
    # For 2D data, we can express this as x2 = (-w1*x1 - b) / w2
    
    # Get the min and max of x1
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    
    # Calculate corresponding x2 values for the decision boundary
    x2_boundary = lambda x1: (-w[0] * x1 - b) / w[1]
    
    # Create points for the line
    x1_points = np.array([x1_min, x1_max])
    x2_points = np.array([x2_boundary(x1_min), x2_boundary(x1_max)])
    
    # Plot the decision boundary
    plt.plot(x1_points, x2_points, 'g-', label='Decision Boundary')
    
    # Add labels and legend
    plt.xlabel('Sepal Width (cm)')
    plt.ylabel('Petal Width (cm)')
    plt.title('Perceptron Decision Boundary')
    plt.legend()
    plt.grid(True)
    
    # Save the figure
    plt.savefig('perceptron_decision_boundary.png')
    plt.close()

def run_multiple_trials(X, y, n_trials=20):
    """
    Run the Perceptron algorithm multiple times and track the number of updates.
    
    Parameters:
    X (numpy.ndarray): Array of data points
    y (numpy.ndarray): Array of labels
    n_trials (int): Number of trials to run
    
    Returns:
    list: Number of updates made in each trial
    """
    updates_list = []
    
    for _ in range(n_trials):
        _, _, updates = perceptron_train(X, y)
        updates_list.append(updates)
    
    return updates_list

def plot_updates_histogram(updates_list):
    """
    Plot a histogram of the number of updates made by the Perceptron algorithm.
    
    Parameters:
    updates_list (list): List of update counts
    """
    plt.figure(figsize=(10, 6))
    plt.hist(updates_list, bins=10, color='skyblue', edgecolor='black')
    plt.xlabel('Number of Updates')
    plt.ylabel('Frequency')
    plt.title('Histogram of Perceptron Updates (20 Trials)')
    plt.grid(True, alpha=0.3)
    plt.savefig('perceptron_updates_histogram.png')
    plt.close()

def main():
    # Load the Iris dataset
    iris = datasets.load_iris()
    X_full = iris.data
    y_full = iris.target
    
    # Restrict to features 1 and 3 (sepal width and petal width)
    X = X_full[:, [1, 3]]
    
    # Restrict to labels 0 and 1, and recode label 0 as -1
    mask = (y_full == 0) | (y_full == 1)
    X = X[mask]
    y = y_full[mask]
    y = np.where(y == 0, -1, 1)
    
    # Part (c): Train the Perceptron and plot the decision boundary
    w, b, updates = perceptron_train(X, y)
    print(f"Trained Perceptron with {updates} updates")
    print(f"Weight vector: {w}")
    print(f"Bias term: {b}")
    
    plot_decision_boundary(X, y, w, b)
    
    # Part (d): Run multiple trials and plot histogram
    updates_list = run_multiple_trials(X, y)
    print(f"Updates across 20 trials: {updates_list}")
    print(f"Average number of updates: {np.mean(updates_list):.2f}")
    print(f"Standard deviation: {np.std(updates_list):.2f}")
    
    plot_updates_histogram(updates_list)

if __name__ == "__main__":
    main()

### question 8

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def load_iris_data():
    """
    Load the Iris dataset and extract features 0 and 2, and labels 1,2.
    
    Returns:
    tuple: (X, y) - feature matrix and labels
    """
    # Load the Iris dataset
    iris = datasets.load_iris()
    X_full = iris.data
    y_full = iris.target
    
    # Restrict to features 0 and 2 (sepal length and petal length)
    X = X_full[:, [0, 2]]
    
    # Restrict to labels 1 and 2
    mask = (y_full == 1) | (y_full == 2)
    X = X[mask]
    y = y_full[mask]
    
    return X, y

def check_linear_separability(X, y):
    """
    Check if the data is linearly separable by training an SVM with a very large C value.
    
    Parameters:
    X (numpy.ndarray): Feature matrix
    y (numpy.ndarray): Labels
    
    Returns:
    bool: True if the data is linearly separable, False otherwise
    """
    # Train an SVM with a very large C value (hard margin)
    svm = SVC(kernel='linear', C=1e6)
    svm.fit(X, y)
    
    # Predict on the training data
    y_pred = svm.predict(X)
    
    # Calculate accuracy
    accuracy = accuracy_score(y, y_pred)
    
    # If accuracy is 1.0, the data is linearly separable
    return accuracy == 1.0, accuracy

def train_svm_with_different_c(X, y, c_values):
    """
    Train SVM models with different C values and record training error and number of support vectors.
    
    Parameters:
    X (numpy.ndarray): Feature matrix
    y (numpy.ndarray): Labels
    c_values (list): List of C values to try
    
    Returns:
    list: List of dictionaries containing C value, training error, and number of support vectors
    """
    results = []
    
    for c in c_values:
        # Train SVM with current C value
        svm = SVC(kernel='linear', C=c)
        svm.fit(X, y)
        
        # Predict on training data
        y_pred = svm.predict(X)
        
        # Calculate training error
        training_error = 1 - accuracy_score(y, y_pred)
        
        # Get number of support vectors
        n_support_vectors = svm.n_support_.sum()
        
        # Store results
        results.append({
            'C': c,
            'training_error': training_error,
            'n_support_vectors': n_support_vectors
        })
    
    return results

def plot_decision_boundary(X, y, c_value):
    """
    Plot the data points and the decision boundary for a specific C value.
    
    Parameters:
    X (numpy.ndarray): Feature matrix
    y (numpy.ndarray): Labels
    c_value (float): C value for the SVM
    
    Returns:
    None
    """
    # Train SVM with the specified C value
    svm = SVC(kernel='linear', C=c_value)
    svm.fit(X, y)
    
    # Create a figure
    plt.figure(figsize=(10, 6))
    
    # Plot the data points
    plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label='Class 1 (Versicolor)')
    plt.scatter(X[y == 2, 0], X[y == 2, 1], color='red', label='Class 2 (Virginica)')
    
    # Create a mesh grid to plot the decision boundary
    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    # Predict on the mesh grid
    Z = svm.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # Plot the decision boundary
    plt.contour(xx, yy, Z, colors='k', levels=[0.5, 1.5, 2.5], alpha=0.5,
                linestyles=['--', '-', '--'])
    
    # Fill the regions
    plt.contourf(xx, yy, Z, colors=['#FFAAAA', '#AAAAFF'], alpha=0.2, levels=[0, 1, 2, 3])
    
    # Highlight support vectors
    plt.scatter(svm.support_vectors_[:, 0], svm.support_vectors_[:, 1],
                s=100, linewidth=1, facecolors='none', edgecolors='k', label='Support Vectors')
    
    # Add labels and legend
    plt.xlabel('Sepal Length (cm)')
    plt.ylabel('Petal Length (cm)')
    plt.title(f'SVM Decision Boundary (C={c_value})')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    # Save the figure
    plt.savefig(f'svm_decision_boundary_C{c_value}.png')
    plt.close()

def print_results_table(results):
    """
    Print a table of results.
    
    Parameters:
    results (list): List of dictionaries containing C value, training error, and number of support vectors
    
    Returns:
    None
    """
    print("\n{:<10} {:<20} {:<20}".format('C', 'Training Error', 'Support Vectors'))
    print("-" * 50)
    for result in results:
        print("{:<10.2f} {:<20.4f} {:<20}".format(
            result['C'],
            result['training_error'],
            result['n_support_vectors']
        ))

def main():
    # Load the Iris dataset
    X, y = load_iris_data()
    
    # Part (a): Check if the data is linearly separable
    is_separable, accuracy = check_linear_separability(X, y)
    print(f"Is the data linearly separable? {'Yes' if is_separable else 'No'}")
    print(f"Accuracy with hard margin SVM: {accuracy:.4f}")
    
    # Part (b): Train SVM with different C values
    c_values = [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000]
    results = train_svm_with_different_c(X, y, c_values)
    
    # Print results table
    print_results_table(results)
    
    # Part (c): Find the best C value and plot decision boundary
    # For simplicity, we'll choose the C value with the lowest training error
    # In practice, you might want to use cross-validation
    best_c = min(results, key=lambda x: x['training_error'])['C']
    print(f"\nBest C value based on training error: {best_c}")
    
    # Plot decision boundary for the best C value
    plot_decision_boundary(X, y, best_c)
    
    # Also plot for a few other C values for comparison
    plot_decision_boundary(X, y, 0.1)  # Low C
    plot_decision_boundary(X, y, 100)  # Medium C
    plot_decision_boundary(X, y, 10000)  # High C

if __name__ == "__main__":
    main()