In [1]:
import numpy as np, cv2, joblib, json
from PIL import Image
from skimage.feature import hog, local_binary_pattern
from torchvision import transforms
from sklearn.metrics import classification_report
from google.colab import drive, files

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
RF_MODEL_PATH  = '/content/drive/MyDrive/COMP6721/models/best_rf.joblib'
LINEAR_SVM_MODEL_PATH = '/content/drive/MyDrive/COMP6721/models/best_svm_linear.joblib'
RBF_SVM_MODEL_PATH = '/content/drive/MyDrive/COMP6721/models/best_rbf_svm.joblib'
DT_MODEL_PATH  = '/content/drive/MyDrive/COMP6721/models/semisup_tree.joblib'
VOCAB_PATH  = '/content/drive/MyDrive/COMP6721/models/vocab_kmeans_600.joblib'

rf_model  = joblib.load(RF_MODEL_PATH)
linear_svm_model = joblib.load(LINEAR_SVM_MODEL_PATH)
rbf_svm_model = joblib.load(RBF_SVM_MODEL_PATH)
dt_model = joblib.load(DT_MODEL_PATH)
kmeans = joblib.load(VOCAB_PATH)
VOCAB_SIZE = kmeans.n_clusters

In [4]:
# constants from training phase
HOG_SIZE   = 128
HOG_CFG    = dict(orientations=9,
                  pixels_per_cell=(16, 16),
                  cells_per_block=(2, 2),
                  visualize=False,
                  channel_axis=None)

# colour histogram (64 bins, normalised)
N_COLOR_BINS_CH = 64           # per channel
N_COLOR_BINS    = 3 * N_COLOR_BINS_CH  # 64 × 3 = 192

# ORB settings (same as training notebook)
ORB = cv2.ORB_create(nfeatures=200)
# ---- LBP ------------------------------------------------------
LBP_RADII = [1, 2]  # uniform

# total feature length  = HOG + Colour + (BoVW×1) + LBP(10+18)
N_HOG  =  ( (HOG_SIZE//16 - 1)     # 7 blocks per dim
          * (HOG_SIZE//16 - 1)
          * 2 * 2 * 9 )            # 2×2×9 bins per block
FEAT_LEN = N_HOG + N_COLOR_BINS + VOCAB_SIZE + (8*LBP_RADII[0]+2) + (8*LBP_RADII[1]+2)
print("Expected feature dim:", FEAT_LEN)

Expected feature dim: 2584


In [5]:
to_gray_resize = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((HOG_SIZE, HOG_SIZE))
])

def hog_colour(image_rgb: Image.Image) -> np.ndarray:
    """HOG + 64-bin histogram per RGB channel ⇒ 1 764 + 192 dims."""
    # HOG (grayscale)
    gray = to_gray_resize(image_rgb)
    hog_vec = hog(np.array(gray), **HOG_CFG).astype(np.float32)

    # colour histograms (R, G, B separately, 64 bins each)
    img_arr = np.array(image_rgb)
    hists = []
    for ch in range(3):
        hist, _ = np.histogram(img_arr[..., ch],
                               bins=N_COLOR_BINS_CH, range=(0, 255))
        hists.append(hist.astype(np.float32))
    col_vec = np.concatenate(hists)
    col_vec /= (col_vec.sum() + 1e-7)      # ℓ₁-norm

    return np.concatenate([hog_vec, col_vec])


In [6]:
def orb_bovw_lbp(gray):
    # ORB → BoVW
    kp, des = ORB.detectAndCompute(gray, None)
    hist = np.zeros(VOCAB_SIZE, dtype=np.float32)
    if des is not None and des.size:
        if des.shape[0] > ORB.getMaxFeatures():
            des = des[:ORB.getMaxFeatures()]
        words = kmeans.predict(des.astype(np.float32))
        hist = np.bincount(words, minlength=VOCAB_SIZE).astype(np.float32)
        hist /= (hist.sum() + 1e-7)

    # multi-scale LBP
    lbp_vecs = []
    for R in LBP_RADII:
        P = 8*R
        lbp = local_binary_pattern(gray, P=P, R=R, method='uniform')
        n_bins = P + 2
        h,_ = np.histogram(lbp.ravel(), bins=n_bins, range=(0,n_bins))
        lbp_vecs.append(h.astype(np.float32)/(h.sum()+1e-7))
    lbp_vec = np.concatenate(lbp_vecs)

    return np.concatenate([hist, lbp_vec])


In [7]:
def hybrid_feature(path):
    img_rgb = Image.open(path).convert('RGB')
    hog_col = hog_colour(img_rgb)
    gray    = cv2.cvtColor(np.array(img_rgb), cv2.COLOR_RGB2GRAY)
    orb_lbp = orb_bovw_lbp(gray)
    full    = np.concatenate([hog_col, orb_lbp])
    assert full.shape[0] == FEAT_LEN, "Dim mismatch!"
    return full.reshape(1,-1)

# class names
class_names = ['library-indoor', 'museum-indoor', 'shopping_mall-indoor']

def predict_single(path, model, top_k=3):
    X = hybrid_feature(path)
    pred = int(model.predict(X)[0])
    print(f"\nImage: {path}\nPrediction: **{class_names[pred]}**")

    # if scaler+LinearSVC, no probas; otherwise check predict_proba
    if hasattr(model, "predict_proba"):
        probs = model.predict_proba(X)[0]
        for cls, p in sorted(zip(class_names, probs), key=lambda z:z[1], reverse=True)[:top_k]:
            print(f"{cls:<18s}: {p:5.2%}")

In [8]:
uploaded = files.upload()           # choose an image
img_path = next(iter(uploaded))     # first filename

Saving Places365_val_00000962.jpg to Places365_val_00000962.jpg


In [9]:
predict_single(img_path, rf_model)


Image: Places365_val_00000962.jpg
Prediction: **shopping_mall-indoor**
shopping_mall-indoor: 63.53%
museum-indoor     : 21.64%
library-indoor    : 14.83%


In [10]:
predict_single(img_path, linear_svm_model)


Image: Places365_val_00000962.jpg
Prediction: **library-indoor**


In [11]:
predict_single(img_path, rbf_svm_model)


Image: Places365_val_00000962.jpg
Prediction: **shopping_mall-indoor**


In [12]:
predict_single(img_path, dt_model)


Image: Places365_val_00000962.jpg
Prediction: **shopping_mall-indoor**
shopping_mall-indoor: 98.95%
library-indoor    : 0.61%
museum-indoor     : 0.44%
