In [3]:
# Ants vs Bees Classification Experiments
# ---------------------------------------
# This script follows the steps in HW10 Q6, using the provided code as a base.
# It visualizes an original and normalized image, extracts ResNet50 features,
# trains a logistic regression classifier, and evaluates k-NN classifiers.

import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import datasets, models, transforms
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# 1. Data Preparation and Visualization

# Paths
data_dir = './hymenoptera_data'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')

# Define transforms for normalization (as in the notebook)
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# Pick an image from the training directory (original and normalized)
sample_class = class_names[0]  # 'ants' or 'bees'
sample_class_dir = os.path.join(train_dir, sample_class)
sample_img_name = os.listdir(sample_class_dir)[0]
sample_img_path = os.path.join(sample_class_dir, sample_img_name)

# Load original image
original_img = Image.open(sample_img_path).convert('RGB')

# Apply normalization transform
normalized_img_tensor = data_transforms['train'](original_img)
# Undo normalization for display
def unnormalize(tensor):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = tensor.numpy().transpose((1, 2, 0))
    img = std * img + mean
    img = np.clip(img, 0, 1)
    return img

normalized_img_disp = unnormalize(normalized_img_tensor)

# Save comparison figure
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.imshow(original_img)
plt.title('Original Image')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.imshow(normalized_img_disp)
plt.title('Normalized Image')
plt.axis('off')
plt.tight_layout()
plt.savefig('q6_image_comparison.png')
plt.close()

# 2. Feature Extraction with ResNet50

# Load pre-trained ResNet50 and remove the final classification layer
resnet50 = models.resnet50(pretrained=True)
modules = list(resnet50.children())[:-1]
resnet50 = nn.Sequential(*modules)
resnet50.eval()
for p in resnet50.parameters():
    p.requires_grad = False

# Helper to extract features for a dataset
def extract_features(dataset):
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=16)
    features = []
    labels = []
    with torch.no_grad():
        for inputs, targets in dataloader:
            outputs = resnet50(inputs)
            outputs = torch.squeeze(outputs)
            if len(outputs.shape) == 1:
                outputs = outputs.unsqueeze(0)
            features.append(outputs.numpy())
            labels.append(targets.numpy())
    X = np.vstack(features)
    y = np.hstack(labels)
    return X, y

X_train, y_train = extract_features(image_datasets['train'])
X_test, y_test = extract_features(image_datasets['val'])

# 3. Logistic Regression Classifier

clf = LogisticRegression(solver='liblinear', random_state=0, max_iter=1000)
clf.fit(X_train, y_train)
y_pred_lr = clf.predict(X_test)
acc_lr = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Test Accuracy: {acc_lr:.4f}")

# 4. k-Nearest Neighbor Classifier

knn_accuracies = {}
for k in [1, 3, 5]:
    knn = KNeighborsClassifier(n_neighbors=k, algorithm='kd_tree')
    knn.fit(X_train, y_train)
    y_pred_knn = knn.predict(X_test)
    acc_knn = accuracy_score(y_test, y_pred_knn)
    knn_accuracies[k] = acc_knn
    print(f"k-NN (k={k}) Test Accuracy: {acc_knn:.4f}")

# 5. Save results for LaTeX table
with open('q6_results.txt', 'w') as f:
    f.write("classifier\ttest_accuracy\n")
    f.write(f"logistic regression\t{acc_lr:.4f}\n")
    for k in [1, 3, 5]:
        f.write(f"k-NN (k={k})\t{knn_accuracies[k]:.4f}\n")

# 6. Print summary for manual LaTeX table entry
print("\nSummary Table (for LaTeX):")
print("classifier\ttest_accuracy")
print(f"logistic regression\t{acc_lr:.4f}")
for k in [1, 3, 5]:
    print(f"k-NN (k={k})\t{knn_accuracies[k]:.4f}")

# End of script



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/scotty/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [02:22<00:00, 722kB/s] 


Logistic Regression Test Accuracy: 0.9542
k-NN (k=1) Test Accuracy: 0.9673
k-NN (k=3) Test Accuracy: 0.9542
k-NN (k=5) Test Accuracy: 0.9608

Summary Table (for LaTeX):
classifier	test_accuracy
logistic regression	0.9542
k-NN (k=1)	0.9673
k-NN (k=3)	0.9542
k-NN (k=5)	0.9608


## <font color='red'>Distinguishing ants from bees</font>

This notebook builds a classifier that distinguishes between images of ants and bees. The classifier has three parts to it:
- The images are of varying sizes. So first, they are all normalized to a fixed size.
- Then they are run through a pre-trained computer vision neural net, ResNet50, that produces a 2048-dimensional representation
- Finally, a logistic regression classifier is built on top of this representation.

### <font color='red'>Various includes</font>

In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
# Torch stuff
import torch
import torch.nn as nn
# Torchvision stuff
from torchvision import datasets, models, transforms
# sklearn stuff
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score

### <font color='red'>Loading Dataset</font>

For both the train and test data, the images need to be normalized to the particular size, 224x224x3, that is required by the ResNet50 network that we will apply to them. This is achieved by a series of transforms.

- The (normalized) training set is in image_datasets['train']
- The (normalized) test set is in image_datasets['val']


In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = './hymenoptera_data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
                  for x in ['train', 'val']}

#### <font color='red'>Look at the classes and data set sizes</font>

In [None]:
class_names = image_datasets['train'].classes
class_names

In [None]:
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
dataset_sizes

#### <font color='red'>Print a sample (transformed) image</font>

In [None]:
item = 200
[itemx,itemy] = image_datasets['train'].__getitem__(item)
print("Label: {}\n".format(class_names[itemy]))
plt.imshow(itemx.permute(1, 2, 0))
plt.show()

### <font color='red'>Load pre-trained ResNet50</font>

Torch has a bunch of pre-trained nets for computer vision. Let's try out one of them: ResNet50. 

In [None]:
resnet50 = models.resnet50(pretrained = True)
modules = list(resnet50.children())[:-1]
resnet50 = nn.Sequential(*modules)
for p in resnet50.parameters():
    p.requires_grad = False

### <font color='red'>Extract ResNet features from dataset</font>

We'll use ResNet to produce a 2048-dimensional representation for each image.

The resulting training set will be in the Numpy arrays (X_train, y_train) and the test set will be in the Numpy arrays (X_test, y_test).


In [None]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x])
              for x in ['train', 'val']}
for batch,data in enumerate(dataloaders['train']):
    if batch==0:
        X_train = torch.squeeze(resnet50(data[0])).numpy()
        y_train = data[1].numpy()
    else:
        X_train = np.vstack((X_train,torch.squeeze(resnet50(data[0])).numpy()))
        y_train = np.hstack((y_train,data[1].numpy()))


for batch,data in enumerate(dataloaders['val']):
    if batch==0:
        X_test = torch.squeeze(resnet50(data[0])).numpy()
        y_test = data[1].numpy()
    else:
        X_test = np.vstack((X_test,torch.squeeze(resnet50(data[0])).numpy()))
        y_test = np.hstack((y_test,data[1].numpy()))


In [None]:
np.shape(X_train), np.shape(y_train), np.shape(X_test), np.shape(y_test)

### <font color='red'>Train logistic regression classifier on the ResNet features</font>

And then we'll evaluate its performance on the test set.

In [None]:
clf = LogisticRegression(solver='liblinear',random_state=0,max_iter=1000)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)
print("Accuracy: {}\n".format(accuracy_score(y_test,y_pred)))
print("Confusion matrix: \n {}".format(confusion_matrix(y_test,y_pred)))

In [None]:
'''
use this same 2048-d representation to construct a k-nearest neighbor classifier. Give the test
accuracies obtained for k = 1, 3, 5. Note: If you use sklearn.neighbors.KNeighborsClassifier,
you might want to set algorithm=’kd tree’
'''