**PROGRAMMING ASSIGNMENT 5**

QUESTION 1   
Image Compression Using K-maps

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.cluster import KMeans
from sklearn.utils import shuffle
from sklearn.metrics import pairwise_distances_argmin
from PIL import Image
from skimage import io

(a) Defining computeCentroid function

In [2]:
def computeCentroid(image, indices):
    pixels = [image[idx] for idx in indices]

    # Computing the mean of RGB values
    centroid = np.mean(pixels, axis=0)

    return centroid

(b) Defining myKmeans function

In [3]:
def mykmeans(X, k, max_iters=100):
    #Initializing cluster centers randomly
    centroids = X[np.random.choice(X.shape[0], k, replace=False)]

    for _ in range(max_iters):
        #Assigning each data point to the nearest cluster center
        distances = np.linalg.norm(X[:, np.newaxis, :] - centroids, axis=2)
        labels = np.argmin(distances, axis=1)

        #Updating cluster centers
        new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])

        # Checking for convergence
        if np.allclose(centroids, new_centroids):
            break

        centroids = new_centroids

    return centroids

(c) Compressing the image

In [4]:
def compress_image(image, centroids):
    # Reshaping the image into a 2D array of pixels
    pixels = np.reshape(image, (-1, 3))
    print("Shape of pixels array:", pixels.shape)

    # Calculating distances from each pixel to each centroid
    distances = np.linalg.norm(pixels[:, np.newaxis, :] - centroids, axis=2)

    # Assigning each pixel to the nearest centroid
    labels = np.argmin(distances, axis=1)

    # Replacing each pixel with the color of its nearest centroid
    compressed_pixels = centroids[labels]

    # Reshaping the compressed pixels back into the original image shape
    compressed_image = np.reshape(compressed_pixels, image.shape)

    return compressed_image

In [5]:
def save_compressed_images(image, k_values, save_path):
    os.makedirs(save_path, exist_ok=True)

    original_path = os.path.join(save_path, 'original.png')
    plt.imsave(original_path, image)

    for k in k_values:
        # Performing K-means clustering
        centroids = mykmeans(image.reshape(-1, 3), k)

        # Compressing the image using centroids
        compressed_image = compress_image(image, centroids)

        compressed_path = os.path.join(save_path, f'compressed_K{k}.png')
        plt.imsave(compressed_path, compressed_image)

In [6]:
image = plt.imread('/content/drive/MyDrive/ML Data/test.png')
k_values = [3, 4, 6, 8]
save_path = '/content/compressed_images/'

save_compressed_images(image, k_values, save_path)

Shape of pixels array: (262144, 3)
Shape of pixels array: (262144, 3)
Shape of pixels array: (262144, 3)
Shape of pixels array: (262144, 3)


(d) Comparing results with the results of kmeans from sklearn library

In [7]:
def compressed_images(image, k_values, save_path):
    os.makedirs(save_path, exist_ok=True)

    original_path = os.path.join(save_path, 'original.png')
    plt.imsave(original_path, image)

    for k in k_values:
        # Performing K-means clustering using scikit-learn's KMeans
        kmeans = KMeans(n_clusters=k, random_state=0).fit(image.reshape(-1, 3))
        centroids = kmeans.cluster_centers_

        # Compressing the image using centroids
        compressed_image = compress_image(image, centroids)

        compressed_path = os.path.join(save_path, f'compressed_K{k}_sklearn.png')
        plt.imsave(compressed_path, compressed_image)


In [8]:
image = plt.imread('/content/drive/MyDrive/ML Data/test.png')
k_values = [3, 4, 6, 8]
save_path = '/content/compressed_images_Kmeans/'

compressed_images(image, k_values, save_path)



Shape of pixels array: (262144, 3)




Shape of pixels array: (262144, 3)




Shape of pixels array: (262144, 3)




Shape of pixels array: (262144, 3)


(e) Spatial Coherence

In [None]:
def compute_spatial_distance(pixel1, pixel2):
    spatial_dist = np.linalg.norm(pixel1 - pixel2)
    return spatial_dist

In [None]:
def mykmeans_spatial(X, k, max_iters=100, spatial_weight=0.5):
    #Initializing cluster centers randomly
    centroids = X[np.random.choice(X.shape[0], k, replace=False)]

    for _ in range(max_iters):
        # Assigning each data point to the nearest cluster center
        distances = np.linalg.norm(X[:, np.newaxis, :] - centroids, axis=2)

        # Calculate spatial distances
        spatial_distances = np.array([[compute_spatial_distance(X[i], X[j]) for j in range(X.shape[0])] for i in range(X.shape[0])])

        # Combine color distance and spatial distance with a weighted sum
        combined_distances = (1 - spatial_weight) * distances + spatial_weight * spatial_distances

        labels = np.argmin(combined_distances, axis=1)

        # Updating cluster centers
        new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])

        # Checking for convergence
        if np.allclose(centroids, new_centroids):
            break

        centroids = new_centroids

    return centroids

In [None]:
def save_compressed_images_spatial(image, k_values, save_path):
    # Create a directory if it doesn't exist
    os.makedirs(save_path, exist_ok=True)

    # Save the original image
    original_path = os.path.join(save_path, 'original.png')
    plt.imsave(original_path, image)

    for k in k_values:
        # Performing K-means clustering with spatial coherence
        centroids = mykmeans_spatial(image.reshape(-1, 3), k)

        # Compressing the image using centroids
        compressed_image = compress_image(image, centroids)

        # Save the compressed image
        compressed_path = os.path.join(save_path, f'compressed_K{k}_spatial.png')
        plt.imsave(compressed_path, compressed_image)

In [None]:
image = plt.imread('/content/drive/MyDrive/ML Data/test.png')
k_values = [3, 4, 6, 8]
save_path = '/content/compressed_images_spatial/'

save_compressed_images_spatial(image, k_values, save_path)

QUESTION 2    
SVM

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.datasets import make_moons
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

TASK 1(A)

In [10]:
from sklearn import datasets
iris = datasets.load_iris ( as_frame=True )

In [11]:
#Selecting only 'setosa' and 'versicolor' classes
selected_classes = iris.target.isin([0, 1])
iris_binary = iris.data[selected_classes]
target_binary = iris.target[selected_classes]

#Extracting 'petal length' and 'petal width' features
X = iris_binary[['petal length (cm)', 'petal width (cm)']]
y = target_binary

#Normalizing the dataset
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

#Spliting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)


Shape of X_train: (80, 2)
Shape of X_test: (20, 2)
Shape of y_train: (80,)
Shape of y_test: (20,)


Task 1(B)

In [12]:
#Training a Linear Support Vector Classifier
linear_svc = LinearSVC(random_state=42)
linear_svc.fit(X_train, y_train)

print("LinearSVC trained successfully.")

LinearSVC trained successfully.


In [13]:
def plot_decision_boundary_save(clf, X, y, title, save_path):
    plt.figure(figsize=(8, 6))

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    # Plot decision boundary
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)

    # Plot training data
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
    plt.xlabel('Petal Length (cm)')
    plt.ylabel('Petal Width (cm)')
    plt.title(title)


    plt.savefig(save_path)
    plt.close()

In [14]:
plot_decision_boundary_save(linear_svc, X_train, y_train, "Decision Boundary on Training Data", "/content/training_decision_boundary.png")

# Saving scatterplot of test data along with original decision boundary
plt.figure(figsize=(8, 6))
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=plt.cm.Paired)
plt.xlabel('Petal Length (cm)')
plt.ylabel('Petal Width (cm)')
plt.title('Scatterplot of Test Data with Original Decision Boundary')

# Retrieving the coefficients and intercept from the trained LinearSVC
coef = linear_svc.coef_[0]
intercept = linear_svc.intercept_

# Ploting the decision boundary
x_vals = np.arange(-2, 2, 0.1)
y_vals = -(coef[0] / coef[1]) * x_vals - intercept / coef[1]
plt.plot(x_vals, y_vals, color='black')

plt.savefig("/content/test_scatterplot_with_decision_boundary.png")
plt.close()

TASK 2(A)

In [16]:
# Generating synthetic dataset
X_synthetic, y_synthetic = make_moons(n_samples=500, noise=0.05, random_state=42)

# Adding 5% noise to the dataset
num_noise_points = int(0.05 * len(X_synthetic))
random_indices = np.random.choice(len(X_synthetic), num_noise_points, replace=False)
y_synthetic[random_indices] = 1 - y_synthetic[random_indices]  # Flipping labels

print("Shape of synthetic dataset:", X_synthetic.shape)
print("Number of misclassifications:", np.sum(y_synthetic != (1 - y_synthetic)))

Shape of synthetic dataset: (500, 2)
Number of misclassifications: 500


TASK 2(B)

In [17]:
# Defining SVM models with different kernels
svm_linear = SVC(kernel='linear', random_state=42)
svm_poly = SVC(kernel='poly', degree=3, gamma='auto', random_state=42)  # Polynomial kernel with degree 3
svm_rbf = SVC(kernel='rbf', gamma='auto', random_state=42)  # RBF kernel

# Fiting SVM models to the synthetic dataset
svm_linear.fit(X_synthetic, y_synthetic)
svm_poly.fit(X_synthetic, y_synthetic)
svm_rbf.fit(X_synthetic, y_synthetic)

In [19]:
def plot_decision_boundary_save(model, X, y, title, save_path):
    plt.figure(figsize=(8, 6))
    x_min, x_max = X[:, 0].min() - 0.2, X[:, 0].max() + 0.2
    y_min, y_max = X[:, 1].min() - 0.2, X[:, 1].max() + 0.2
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.3)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.savefig(save_path)
    plt.close()


In [20]:
plot_decision_boundary_save(svm_linear, X_synthetic, y_synthetic, 'Linear Kernel', '/content/svm_linear_decision_boundary.png')
plot_decision_boundary_save(svm_poly, X_synthetic, y_synthetic, 'Polynomial Kernel', '/content/svm_poly_decision_boundary.png')
plot_decision_boundary_save(svm_rbf, X_synthetic, y_synthetic, 'RBF Kernel', '/content/svm_rbf_decision_boundary.png')

TASK 2(C)

In [21]:
# Defining the parameter grid for grid search
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

svm_rbf_tuned = SVC(kernel='rbf', random_state=42)

grid_search = GridSearchCV(estimator=svm_rbf_tuned, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)

# Performing grid search
grid_search.fit(X_synthetic, y_synthetic)

print("Best hyperparameters:", grid_search.best_params_)

# Get the best SVM model
best_svm_rbf = grid_search.best_estimator_


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best hyperparameters: {'C': 1, 'gamma': 1}


TASK 2(D)

In [22]:
def plot_decision_boundary_save(model, X, y, title, save_path):
    plt.figure(figsize=(8, 6))
    x_min, x_max = X[:, 0].min() - 0.2, X[:, 0].max() + 0.2
    y_min, y_max = X[:, 1].min() - 0.2, X[:, 1].max() + 0.2
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.3)
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.title(title)
    plt.savefig(save_path)
    plt.close()

In [23]:
# Plot decision boundary with best hyperparameters and save the plot
plot_decision_boundary_save(best_svm_rbf, X_synthetic, y_synthetic, 'RBF Kernel SVM with Best Hyperparameters', '/content/svm_rbf_decision_boundary_best_hyperparameters.png')
