                                                        Programming Assignment - 4

In [31]:
## importing required libraries
import os
from PIL import Image
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans,BisectingKMeans, SpectralClustering, DBSCAN, AgglomerativeClustering
from sklearn.metrics import fowlkes_mallows_score, silhouette_score
import numpy as np
import matplotlib.pyplot as plt

In [3]:
## getting data 
path = "cropped_images" 
folders = [os.path.join(path, folder) for folder in os.listdir(path)]
folders

['cropped_images\\n02090379-redbone',
 'cropped_images\\n02097047-miniature_schnauzer',
 'cropped_images\\n02104365-schipperke',
 'cropped_images\\n02112018-Pomeranian']

In [5]:
# Preprocessing Transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# model building

In [8]:
import warnings
warnings.filterwarnings("ignore")

model = models.resnet18(pretrained=True)
model.eval() 
layer = model._modules.get('layer4')  
features = []
labels = []  
class_to_label = {folder: idx for idx, folder in enumerate(folders)}

In [10]:
def get_features(module, input, output):
    features.append(output.squeeze().detach().numpy())
hook = layer.register_forward_hook(get_features)

In [12]:
for folder, label in class_to_label.items():
    for img_name in os.listdir(folder):
        img_path = os.path.join(folder, img_name)
        try:
            image = Image.open(img_path).convert('RGB')
            input_tensor = transform(image).unsqueeze(0)
            with torch.no_grad():
                _ = model(input_tensor)
            labels.append(label)
        except Exception as e:
            print(f"Error processing image {img_name}: {e}")

In [13]:
hook.remove()

In [14]:
features = np.array(features)
labels = np.array(labels)

In [15]:
features = features.reshape(features.shape[0], -1)


In [16]:
pca = PCA(n_components=2)
features_2d = pca.fit_transform(features)


In [17]:
results = {}

In [18]:
# K-Means Clustering
kmeans_random = KMeans(n_clusters=4, init='random', random_state=42)
labels_kmeans_random = kmeans_random.fit_predict(features_2d)
results['K-Means (Random)'] = labels_kmeans_random

In [26]:
kmeans_plus = KMeans(n_clusters=4, init='k-means++', random_state=42)
labels_kmeans_plus = kmeans_plus.fit_predict(features_2d)
results['K-Means (k-means++)'] = labels_kmeans_plus

In [33]:
# Bisecting K-Means
bisecting_kmeans = BisectingKMeans(n_clusters=4, init='random', random_state=42)
labels_bisecting = bisecting_kmeans.fit_predict(features_2d)
results['Bisecting K-Means'] = labels_bisecting

In [35]:
# Spectral Clustering
spectral = SpectralClustering(n_clusters=4, random_state=42, affinity='nearest_neighbors')
labels_spectral = spectral.fit_predict(features_2d)
results['Spectral Clustering'] = labels_spectral

In [38]:
dbscan = DBSCAN(eps=2.15, min_samples=5).fit(features_2d)
pred= dbscan.labels_
n=len(set(pred))-(1 if -1 in pred else 0)
print(n)
results['DBSCAN'] = pred


4


In [40]:
for linkage in ['ward', 'complete', 'average', 'single']:
    agglo = AgglomerativeClustering(n_clusters=4, linkage=linkage)
    labels_agglo = agglo.fit_predict(features_2d)
    results[f'Agglomerative ({linkage})'] = labels_agglo

In [42]:
fowlkes_mallows = {}
silhouette_scores = {}

for method, cluster_labels in results.items():
    try:
        fm_index = fowlkes_mallows_score(labels, cluster_labels)
        silhouette = silhouette_score(features_2d, cluster_labels)
        fowlkes_mallows[method] = fm_index
        silhouette_scores[method] = silhouette
    except ValueError:
        print(f"Error evaluating method: {method}")
fowlkes_mallows_ranking = sorted(fowlkes_mallows.items(), key=lambda x: x[1], reverse=True)
silhouette_ranking = sorted(silhouette_scores.items(), key=lambda x: x[1], reverse=True)

print("\nFowlkes-Mallows Index Rankings:")
for method, score in fowlkes_mallows_ranking:
    print(f"{method}: {score}")

print("\nSilhouette Coefficient Rankings:")
for method, score in silhouette_ranking:
    print(f"{method}: {score}")






Fowlkes-Mallows Index Rankings:
K-Means (Random): 0.8899785222429596
K-Means (k-means++): 0.8899785222429596
Agglomerative (ward): 0.8831133611953269
Agglomerative (average): 0.8548003314480488
Spectral Clustering: 0.8509576015678834
Agglomerative (complete): 0.8240835062812787
Bisecting K-Means: 0.7614245419392867
Agglomerative (single): 0.6222199833809526
DBSCAN: 0.4957314558467113

Silhouette Coefficient Rankings:
K-Means (Random): 0.6216506361961365
K-Means (k-means++): 0.6216506361961365
Agglomerative (ward): 0.6151702404022217
Spectral Clustering: 0.6131873726844788
Agglomerative (average): 0.610584557056427
Agglomerative (complete): 0.5959839820861816
Bisecting K-Means: 0.5150774717330933
Agglomerative (single): 0.14183178544044495
DBSCAN: -0.42897188663482666


#### Reference : https://pytorch.org/vision/stable/feature_extraction.html
#### https://scikit-learn.org/stable/modules/clustering.html
#### https://kozodoi.me/blog/20210527/extracting-features