In [1]:
# imports
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
from torchvision.utils import make_grid

#neural net imports
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


In [2]:
#import external libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
import math
import cv2
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

%matplotlib inline


In [3]:
device = 'cpu'
if torch.cuda.is_available():
    device = torch.device('cuda')
print('Running on :',device)

Running on : cuda


In [4]:
# definind transform for the MNIST dataset
# converting to tensors and normalize pixel vals with mean 0.5 and std-deviation 0.5
transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(mean=(0.5), std=(0.5))
])

# Download and load the train set
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test set
testset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

print("Training set size:", len(trainset))
print("Test set size:", len(testset))


Training set size: 60000
Test set size: 10000


In [39]:
# displays the details of 1 keypoint and its descriptor
def print_keypoint_details(keypoint, descriptor):
    print('pt : ',keypoint.pt)
    print('size : ',keypoint.size)
    print('angle : ',keypoint.angle)
    print('response : ',keypoint.response)
    print('octave : ', keypoint.octave)
    print('class id : ',keypoint.class_id)
    print('----------------------------------')
    print('descriptor : ',descriptor)

# Function to extract SIFT features from an image
def extract_sift_features(image):
    gray = image.numpy().squeeze().astype(np.uint8)  # Convert to CV_8U (needed for passing to sift function)
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray, None)
    # print(print_keypoint_details(keypoints[0],descriptors[0]))
    return descriptors

# Function to extract SIFT features from a set of images
def extract_features(data_loader):
    features = []
    for images, _ in data_loader:
        for image in images:
            descriptors = extract_sift_features(image)
            if descriptors is not None:
                features.extend(descriptors)
    return np.array(features)


In [40]:
train_features = extract_features(trainloader)

In [43]:
print(train_features[0])

[  0.   0.   0.   2.  73.   1.   0.   0.  62.   0.   0.  17. 183.   4.
   0.   6. 183.   1.   0.   2.  34.   1.   0.  26.  46.   1.   0.   0.
   0.   0.   0.   2.   0.   0.   0.   0.  86.   8.   0.   0.  70.   5.
   0.   4. 183.  61.   9.  13. 183.  40.   0.   1.  34.  10.   9.  49.
  48.   7.   0.   0.   0.   0.   0.   2.   0.   0.   0.   0.  28.  20.
   0.   0.  11.   1.   0.   0. 116. 183.  93.  26. 116.   9.   0.   0.
   6.  27.  76. 107.  18.   2.   0.   0.   0.   0.   0.   2.   0.   0.
   0.   0.   0.   2.   0.   0.   0.   0.   0.   0.   0.  16.  10.   0.
   0.   0.   0.   0.   0.   2.   5.   1.   0.   0.   0.   0.   0.   0.
   0.   0.]
