In [None]:
"""寫入txt檔

import os

# Specify the path to the directory containing subdirectories
directory_path = '/Users/chenmaige/Downloads/TinyImageNet/TIN'

# Specify the paths for the output text files
train_txt_path = '/Users/chenmaige/Downloads/TinyImageNet/train.txt'
test_txt_path = '/Users/chenmaige/Downloads/TinyImageNet/test.txt'

# Open text files for writing
f1 = open(train_txt_path, 'w')
f2 = open(test_txt_path, 'w')

# Iterate through subdirectories in the specified directory
for i, subdirectory in enumerate(os.listdir(directory_path)):
    subdirectory_path = os.path.join(directory_path, subdirectory)

    # Check if the item in the directory is a subdirectory
    if os.path.isdir(subdirectory_path):
        # List contents of the images directory in the subdirectory
        d2 = os.listdir(os.path.join(subdirectory_path, 'images'))

        # Write file paths and labels to the appropriate text files
        for j in range(len(d2) - 1):
            image_path = os.path.join(subdirectory_path, 'images', d2[j])
            f1.write("%s %d\n" % (image_path, i))

        # Write the last image path and label to the test text file
        image_path = os.path.join(subdirectory_path, 'images', d2[-1])
        f2.write("%s %d\n" % (image_path, i))

# Close text files
f1.close()
f2.close()

"""

In [2]:
import numpy as np
import cv2
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score
from skimage.feature import hog
from sklearn.cluster import KMeans
import xgboost as xgb


##載入圖片與做 Feature Extraction
def load_img(f, num_clusters=50, func="orb"):
    classes = [0, 1, 2, 3, 4]
    f = open(f)
    lines = f.readlines()
    imgs, lab = [], []
    if func=="orb":
        orb = cv2.ORB_create()
    elif func=="brisk":
        brisk = cv2.BRISK_create()
    elif func=="sift":
        sift = cv2.SIFT_create()
    
    #初始化descriptors
    descriptors = []
    for i in range(len(lines)):
        line = lines[i].strip()  #去除文件中字串首尾的空白
        fn, label = line.split(' ') #分割文件中字串（前面存到fn，後面存到label）
        label = int(label)
        #0~4 classes #輸入圖像->輸出keypoints、descriptors
        if label in classes:
            im1 = cv2.imread(fn)
            im1 = cv2.resize(im1, (256, 256)) #將圖像從64x64放大到256x256
            im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY) #將圖像的RGB色彩轉為灰階
            if func=="orb":
                kp, des = orb.detectAndCompute(im1, None)
            elif func=="brisk":
                kp, des = brisk.detectAndCompute(im1, None)
            elif func=="sift":
                kp, des = sift.detectAndCompute(im1, None)
            elif func=="hog":
                des, _ = hog(im1, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), block_norm='L2-Hys', visualize=True)
            #將所有des放入descriptors這個列表當中
            if des is not None:
                descriptors.extend(des) 
    descriptors = np.array(descriptors)
    
    #將 descriptors 去做 kmeans clustering
    if func=="hog":
        kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(descriptors.reshape(-1, 1))
    else:
        kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(descriptors)
    
    
    for i in range(len(lines)):
        line = lines[i].strip()  
        fn, label = line.split(' ')
        label = int(label)
        if label in classes:
            im1 = cv2.imread(fn)
            im1 = cv2.resize(im1, (256, 256))
            im1 = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
            if func=="orb":
                kp, des = orb.detectAndCompute(im1, None)
            elif func=="brisk":
                kp, des = brisk.detectAndCompute(im1, None)
            elif func=="sift":
                kp, des = sift.detectAndCompute(im1, None)
            elif func=="hog":
                des, _ = hog(im1, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), block_norm='L2-Hys', visualize=True)
            #des放進去做 kmeans 預測
            features = np.zeros(num_clusters)
            if des is not None:
                if func=="hog":
                    labels = kmeans.predict(des.reshape(-1, 1))
                else:
                    labels = kmeans.predict(des)
                features = np.bincount(labels, minlength=num_clusters) #改變labels的維度，變成clusters個數，這樣所有都可以統一
            imgs.append(features)
            lab.append(int(label))

    imgs = np.asarray(imgs)
    lab = np.asarray(lab, np.int32)
    return imgs, lab
#設定模型
classifiers = {
        'XGBoost': xgb.XGBClassifier(),  #這邊加入XGBoost
        'K-Nearest Neighbors': KNeighborsClassifier(),
        'Support Vector Machine': SVC()
    }
#模型訓練與預測
for name, clf in classifiers.items():
    # Load training and testing data
    for extract_func in ['brisk','orb','sift','hog']:
        x, y = load_img('/Users/chenmaige/Downloads/TinyImageNet/train.txt',func=extract_func) #更改為檔案路徑
        tx, ty = load_img('/Users/chenmaige/Downloads/TinyImageNet/test.txt',func=extract_func) #更改為檔案路徑
        clf.fit(x, y)
        pred = clf.predict(tx)
        acc = accuracy_score(ty, pred)
        f1 = f1_score(ty, pred, average='weighted')
        print(f"{name} {extract_func} - Accuracy: {acc}, F1-Score: {f1}")

XGBoost brisk - Accuracy: 0.2, F1-Score: 0.13333333333333333
XGBoost orb - Accuracy: 0.6, F1-Score: 0.5333333333333333
XGBoost sift - Accuracy: 0.4, F1-Score: 0.26666666666666666
XGBoost hog - Accuracy: 0.0, F1-Score: 0.0
K-Nearest Neighbors brisk - Accuracy: 0.6, F1-Score: 0.5
K-Nearest Neighbors orb - Accuracy: 0.6, F1-Score: 0.5333333333333333
K-Nearest Neighbors sift - Accuracy: 0.4, F1-Score: 0.3
K-Nearest Neighbors hog - Accuracy: 0.6, F1-Score: 0.4666666666666666
Support Vector Machine brisk - Accuracy: 0.2, F1-Score: 0.13333333333333333
Support Vector Machine orb - Accuracy: 0.6, F1-Score: 0.5333333333333333
Support Vector Machine sift - Accuracy: 0.8, F1-Score: 0.7333333333333333
Support Vector Machine hog - Accuracy: 0.2, F1-Score: 0.06666666666666667
