In [None]:
import h5py
import cv2
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score


In [None]:
train = h5py.File("train.h5", "r")
test = h5py.File("test.h5", "r")

X_train_raw = np.array(train["train_set_x"])
y_train = np.array(train["train_set_y"]).reshape(-1)

X_test_raw = np.array(test["test_set_x"])
y_test = np.array(test["test_set_y"]).reshape(-1)

print("Train images:", X_train_raw.shape)
print("Train labels:", y_train.shape)
print("Test images:", X_test_raw.shape)
print("Test labels:", y_test.shape)


Train images: (209, 64, 64, 3)
Train labels: (209,)
Test images: (50, 64, 64, 3)
Test labels: (50,)


In [None]:
def extract_sift_features(image, num_desc=5):
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)

    if descriptors is None:
        return np.zeros(num_desc * 128)

    if descriptors.shape[0] < num_desc:
        pad = np.zeros((num_desc - descriptors.shape[0], 128))
        descriptors = np.vstack([descriptors, pad])
    else:
        descriptors = descriptors[random.sample(range(descriptors.shape[0]), num_desc)]

    return descriptors.flatten()

def get_sift_features(X):
    features = []
    for i, img in enumerate(X):
        feat = extract_sift_features(img)
        features.append(feat)
        if (i+1) % 500 == 0:
            print(f"Processed {i+1}/{len(X)} images...")
    return np.array(features)


In [None]:
print("Extracting SIFT features for training set...")
X_train = get_sift_features(X_train_raw)

print("Extracting SIFT features for test set...")
X_test = get_sift_features(X_test_raw)

print("SIFT feature shapes:", X_train.shape, X_test.shape)


Extracting SIFT features for training set...
Extracting SIFT features for test set...
SIFT feature shapes: (209, 640) (50, 640)


In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.long)
X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.long)


In [None]:
class ANN(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

num_classes = len(np.unique(y_train))
model = ANN(input_dim=X_train_t.shape[1], num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(model)


ANN(
  (fc1): Linear(in_features=640, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=2, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
)


In [None]:
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_t)
    loss = criterion(outputs, y_train_t)
    loss.backward()
    optimizer.step()
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


Epoch [1/50], Loss: 0.0000
Epoch [2/50], Loss: 0.0000
Epoch [3/50], Loss: 0.0000
Epoch [4/50], Loss: 0.0000
Epoch [5/50], Loss: 0.0000
Epoch [6/50], Loss: 0.0000
Epoch [7/50], Loss: 0.0000
Epoch [8/50], Loss: 0.0000
Epoch [9/50], Loss: 0.0000
Epoch [10/50], Loss: 0.0000
Epoch [11/50], Loss: 0.0000
Epoch [12/50], Loss: 0.0000
Epoch [13/50], Loss: 0.0000
Epoch [14/50], Loss: 0.0000
Epoch [15/50], Loss: 0.0000
Epoch [16/50], Loss: 0.0000
Epoch [17/50], Loss: 0.0000
Epoch [18/50], Loss: 0.0000
Epoch [19/50], Loss: 0.0000
Epoch [20/50], Loss: 0.0000
Epoch [21/50], Loss: 0.0000
Epoch [22/50], Loss: 0.0000
Epoch [23/50], Loss: 0.0000
Epoch [24/50], Loss: 0.0000
Epoch [25/50], Loss: 0.0000
Epoch [26/50], Loss: 0.0000
Epoch [27/50], Loss: 0.0000
Epoch [28/50], Loss: 0.0000
Epoch [29/50], Loss: 0.0000
Epoch [30/50], Loss: 0.0000
Epoch [31/50], Loss: 0.0000
Epoch [32/50], Loss: 0.0000
Epoch [33/50], Loss: 0.0000
Epoch [34/50], Loss: 0.0000
Epoch [35/50], Loss: 0.0000
Epoch [36/50], Loss: 0.0000
E

In [None]:
model.eval()
with torch.no_grad():
    preds = model(X_test_t)
    _, predicted = torch.max(preds, 1)

acc = accuracy_score(y_test_t, predicted)
print(f"\n Test Accuracy: {acc * 100:.2f}%")


 Test Accuracy: 64.00%
