In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
from sklearn.cluster import MiniBatchKMeans
from sklearn.preprocessing import StandardScaler

def resize_with_aspect_ratio(image, target_size=(256, 256)):
    height, width = image.shape[:2]
    target_width, target_height = target_size
    width_ratio = target_width / width
    height_ratio = target_height / height
    ratio = min(width_ratio, height_ratio)
    resized_image = cv2.resize(image, (int(width * ratio), int(height * ratio)), interpolation=cv2.INTER_LINEAR)
    return resized_image

def extract_SIFT_features(image):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return descriptors

def build_vocab(images, n_clusters=50):
    descriptor_list = []
    for img in images:
        if img is not None:
            descriptors = extract_SIFT_features(img)
            if descriptors is not None:
                descriptor_list.extend(descriptors.astype(np.float32))
    kmeans = MiniBatchKMeans(n_clusters=n_clusters, batch_size=n_clusters*3)
    descriptor_array = np.array(descriptor_list, dtype=np.float32)
    kmeans.fit(descriptor_array)
    return kmeans

def bag_of_words_histogram(image, vocab_model):
    descriptors = extract_SIFT_features(image)
    if descriptors is None:
        return np.zeros(vocab_model.n_clusters, np.float32)
    descriptors = descriptors.astype(np.float32)
    predict = vocab_model.predict(descriptors)
    hist, _ = np.histogram(predict, bins=np.arange(vocab_model.n_clusters+1), density=True)
    return hist

def load_images_and_labels(file_path, vocab_model, target_size=(256, 256)):
    imgs, labels = [], []
    with open(file_path, 'r') as f:
        lines = f.readlines()
        for line in tqdm(lines, desc="Loading and processing images"):
            file_name, label = line.strip().split(' ')
            image = cv2.imread(file_name)
            if image is not None:
                resized_image = resize_with_aspect_ratio(image, target_size)
                bow_hist = bag_of_words_histogram(resized_image, vocab_model)
                imgs.append(bow_hist)
                labels.append(int(label))
    return np.array(imgs), np.array(labels)

In [None]:
train_images = [cv2.imread(line.split()[0]) for line in tqdm(open('train.txt', 'r'), desc="Building vocab")]
vocab_model = build_vocab(train_images)

In [None]:
x, y = load_images_and_labels('train.txt', vocab_model)
tx, ty = load_images_and_labels('test.txt', vocab_model)

In [None]:
print("訓練數據的圖像數據形狀:", x.shape)
print("訓練數據的標籤數據形狀:", y.shape)
print("測試數據的圖像數據形狀:", tx.shape)
print("測試數據的標籤數據形狀:", ty.shape)

In [None]:
%%time
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

knn_classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto')
knn_classifier.fit(x, y)

y_pred = knn_classifier.predict(tx)

accuracy = accuracy_score(ty, y_pred)
f1 = f1_score(ty, y_pred, average='weighted')

print(f"SIFT + KNN準確率: {accuracy:.4f}")
print(f"F1分数: {f1:.4f}")

In [None]:
%%time
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42, n_jobs=-1)

rf_model.fit(x, y)

y_pred = rf_model.predict(tx)

accuracy = accuracy_score(ty, y_pred)
f1 = f1_score(ty, y_pred, average='weighted')

print(f"SIFT + RF準確率: {accuracy:.4f}")
print(f"F1分數: {f1:.4f}")

In [None]:
%%time
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

svm_classifier = SVC(C=1.0, kernel='rbf', verbose=False)
svm_classifier.fit(x, y)

y_pred = svm_classifier.predict(tx)

accuracy = accuracy_score(ty, y_pred)
f1 = f1_score(ty, y_pred, average='weighted')

print(f"SIFT + svm準確率: {accuracy:.4f}")
print(f"F1分數: {f1:.4f}")