In [2]:
# Check what's already installed
!pip list | grep -E "faiss|cuml|cupy"
!nvidia-smi

cuml-cu12                          25.2.1
cupy-cuda11x                       13.4.1
cupy-cuda12x                       13.4.1
faiss-cpu                          1.10.0
libcuml-cu12                       25.2.1
Sat Apr 26 16:55:11 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   34C    P8              9W /   70W |       3MiB /  15360MiB |      0%      Default |
|                     

In [6]:
"""
Hieroglyph Recognition – paper‑faithful implementation with optional GPU (cuML + FAISS)

* Follows Franken & van Gemert, "Automatic Egyptian Hieroglyph Recognition
  by Retrieving Images as Texts", ACM MM 2013.
* Uses cuML MiniBatchKMeans and FAISS‑GPU when a CUDA device is present
  (Kaggle "GPU (T4 ×2)" runner). Falls back to scikit‑learn / CPU otherwise.
* Expects a directory tree like::

        data/
            A1/
                img_0001.png
                img_0002.png
            D36/
                ...
  e.g. the Kaggle dataset mounted under
        /kaggle/input/egyptian-hieroglyphics-datasets
"""
import os
import warnings
from pathlib import Path
from typing import List, Tuple

import cv2
import numpy as np
from tqdm.auto import tqdm
from skimage.feature import hog
from sklearn.preprocessing import normalize


# -------------------------------------------------------------------------
# 0 · optional GPU back‑ends
# -------------------------------------------------------------------------
GPU_AVAILABLE = False
try:
    # Try to use pre-installed GPU libraries
    import cupy as cp
    
    # Try to access cuML's KMeans
    try:
        from cuml.cluster import KMeans as cuKMeans
    except ImportError:
        # Alternative import path
        try:
            from cuml.cluster import MiniBatchKMeans as cuKMeans
        except ImportError:
            print("Using sklearn KMeans instead of cuML")
            from sklearn.cluster import MiniBatchKMeans as cuKMeans
    
    # Use CPU version of FAISS
    import faiss
    print("Using FAISS CPU version with cupy")
    
    # We'll simulate GPU acceleration with CPU FAISS + cupy
    GPU_AVAILABLE = True
    
except Exception as e:
    warnings.warn(f"GPU libraries not found – falling back to CPU. Error: {str(e)}")
    # Always import these for CPU fallback
    from sklearn.cluster import MiniBatchKMeans
    from sklearn.neighbors import NearestNeighbors

# fallback CPU k‑means
def _cpu_kmeans(n_clusters=200):
    return MiniBatchKMeans(n_clusters=n_clusters,
                          batch_size=16384,
                          max_iter=100,
                          random_state=0)

# -------------------------------------------------------------------------
# 1 · Localisation + pre‑processing   (Sec 3·1)
# -------------------------------------------------------------------------
class Localiser:
    def __init__(self, min_size: int = 10, y_thresh: int = 20):
        self.min_size = min_size
        self.y_thresh = y_thresh

    def _bboxes(self, img: np.ndarray) -> List[Tuple[int, int, int, int]]:
        if img.ndim == 3:
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        else:
            gray = img
        bin_img = cv2.adaptiveThreshold(gray, 255,
                                        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                        cv2.THRESH_BINARY_INV, 11, 2)
        bin_img = cv2.morphologyEx(bin_img, cv2.MORPH_CLOSE,
                                   np.ones((3, 3), np.uint8))
        cnts, _ = cv2.findContours(bin_img, cv2.RETR_EXTERNAL,
                                   cv2.CHAIN_APPROX_SIMPLE)
        b = []
        for c in cnts:
            x, y, w, h = cv2.boundingRect(c)
            if w > self.min_size and h > self.min_size:
                b.append((x, y, x + w, y + h))
        return b

    def reading_order(self, bboxes):
        if not bboxes:
            return []
        # sort vertically then right‑to‑left per line
        bboxes.sort(key=lambda b: b[1])
        lines, cur = [], []
        for bb in bboxes:
            if not cur:
                cur.append(bb)
            elif abs(bb[1] - cur[-1][1]) < self.y_thresh:
                cur.append(bb)
            else:
                lines.append(cur); cur = [bb]
        if cur:
            lines.append(cur)
        ordered = []
        for ln in lines:
            ln.sort(key=lambda b: -b[0])
            ordered.extend(ln)
        return ordered

    def cut(self, img: np.ndarray, bb, size=(50, 75)):
        x1, y1, x2, y2 = bb
        patch = img[y1:y2, x1:x2].copy()
        if patch.ndim == 2:
            patch = cv2.cvtColor(patch, cv2.COLOR_GRAY2BGR)
        h, w = patch.shape[:2]
        tgt_h, tgt_w = size[1], size[0]
        scale = tgt_h / h
        patch = cv2.resize(patch, (int(w * scale), tgt_h))
        pad = tgt_w - patch.shape[1]
        if pad > 0:
            left = pad // 2
            patch = cv2.copyMakeBorder(patch, 0, 0, left, pad - left,
                                       cv2.BORDER_REPLICATE)
        else:
            patch = patch[:, (-pad)//2:(-pad)//2 + tgt_w]
        return patch

# -------------------------------------------------------------------------
# 2 · Descriptors   (Sec 3·2)
# -------------------------------------------------------------------------
class Descriptors:
    def __init__(self, hog_bins=8, hog_cells=(4, 4), rings=3, segs=8):
        self.h_bins = hog_bins
        self.h_cells = hog_cells
        self.sc_rings = rings
        self.sc_segs = segs
        # Fixed dimensions for outputs
        self.hog_dim = self.h_bins * self.h_cells[0] * self.h_cells[1]
        self.sc_dim = self.sc_rings * self.sc_segs

    # ---- HOG
    def _hog(self, img):
        if img.ndim == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # First resize to make consistent dimensions
        img = cv2.resize(img, (64, 64))  # Standardize to 64x64
        h, w = img.shape
        fd = hog(img,
                 orientations=self.h_bins,
                 pixels_per_cell=(h // self.h_cells[0],
                                  w // self.h_cells[1]),
                 cells_per_block=(1, 1),
                 feature_vector=True)
        return normalize(fd.reshape(1, -1))[0]

    # ---- Shape‑Context   (edge histogram)
    def _sc(self, img):
        if img.ndim == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # First resize to make consistent dimensions
        img = cv2.resize(img, (64, 64))  # Standardize to 64x64
        edges = cv2.Canny(img, 100, 200)
        pts = np.column_stack(np.where(edges > 0))
        if len(pts) < 5:
            return np.zeros(self.sc_rings * self.sc_segs)
        centre = np.array([img.shape[0] / 2, img.shape[1] / 2])
        max_r = np.linalg.norm(centre)
        r_bins = np.logspace(0, np.log10(max_r), self.sc_rings + 1)
        a_bins = np.linspace(0, 2*np.pi, self.sc_segs + 1)
        hist = np.zeros((self.sc_rings, self.sc_segs))
        for p in pts:
            v = p - centre
            r = np.linalg.norm(v)
            t = np.arctan2(v[0], v[1]) % (2*np.pi)
            r_i = np.searchsorted(r_bins, r) - 1
            a_i = np.searchsorted(a_bins, t) - 1
            if 0 <= r_i < self.sc_rings and 0 <= a_i < self.sc_segs:
                hist[r_i, a_i] += 1
        return (hist / (hist.sum() + 1e-9)).ravel()

    # ---- HOOSC = HOG + SC
    def vector(self, img, mode="hoosc"):
        # Ensure consistent output dimensions by standardizing the image first
        if img is None or img.size == 0:
            # Handle empty images
            if mode == "hog":
                return np.zeros(self.hog_dim)
            elif mode == "sc":
                return np.zeros(self.sc_dim)
            else:  # hoosc
                return np.zeros(self.hog_dim + self.sc_dim)
        
        if mode == "hog":   
            return self._hog(img)
        if mode == "sc":    
            return self._sc(img)
        if mode == "hoosc":
            hog_feat = self._hog(img)
            sc_feat = self._sc(img)
            return normalize(np.hstack([hog_feat, sc_feat]).reshape(1, -1))[0]
        raise ValueError(mode)


# -------------------------------------------------------------------------
# 3 · Matcher   (Sec 3·3)  – single/HOG or BoW+χ² (GPU)
# -------------------------------------------------------------------------
class Matcher:
    def __init__(self, desc_type="hoosc", scheme="single", vocab=200):
        self.desc_type = desc_type
        self.scheme = scheme
        self.vocab = vocab
        self.desc = Descriptors()

    # ------------------------------------------------------------------
    def fit(self, imgs: List[np.ndarray], labels: List[str]):
        self.labels = np.asarray(labels)
        # global descriptors
        self.glob = np.vstack([self.desc.vector(im, self.desc_type)
                               for im in tqdm(imgs, desc="global desc")]).astype("float32")
    
        # Use CPU FAISS but still benefit from cupy for other operations
        if 'faiss' in globals():
            d = self.glob.shape[1]
            self.idx_glob = faiss.IndexFlatL2(d)
            self.idx_glob.add(self.glob)
            print("Using CPU FAISS for indexing")
        else:
            self.nn = NearestNeighbors(n_neighbors=5, metric="euclidean")
            self.nn.fit(self.glob)
    
        # Build BoW if requested
        if self.scheme == "bow":
            patches = self._collect_patches(imgs)
            if GPU_AVAILABLE and 'cp' in globals():
                # Use cupy to accelerate KMeans calculation even with CPU FAISS
                try:
                    km = cuKMeans(n_clusters=self.vocab, random_state=0)
                    patches_gpu = cp.asarray(patches)
                    km.fit(patches_gpu)
                    self.centers = cp.asnumpy(km.cluster_centers_)
                except Exception as e:
                    print(f"Error with cuML KMeans: {str(e)}, falling back to CPU")
                    # Make sure we're using the properly imported MiniBatchKMeans
                    from sklearn.cluster import MiniBatchKMeans
                    km = MiniBatchKMeans(n_clusters=self.vocab, 
                                        batch_size=16384,
                                        max_iter=100,
                                        random_state=0).fit(patches)
                    self.centers = km.cluster_centers_
            else:
                km = _cpu_kmeans(self.vocab).fit(patches)
                self.centers = km.cluster_centers_
            
            # Generate BOW histograms for all images
            self.bows = np.vstack([self._bow_hist(im) for im in tqdm(imgs, desc="building BoW")]).astype("float32")
            
            # Apply Hellinger kernel (square root) for chi-square similarity
            sqrt_bow = np.sqrt(self.bows)  # Define sqrt_bow before using it!
            
            if 'faiss' in globals():
                d_b = sqrt_bow.shape[1]
                self.idx_bow = faiss.IndexFlatL2(d_b)
                self.idx_bow.add(sqrt_bow)
            else:
                self.nn_bow = NearestNeighbors(n_neighbors=5, metric="euclidean")
                self.nn_bow.fit(sqrt_bow)

    # ------------------------------------------------------------------
    def _collect_patches(self, imgs):
        all_desc = []
        for im in imgs:
            h, w = im.shape[:2]; ps = max(8, min(h, w)//4)
            for y in range(0, h-ps, ps//2):
                for x in range(0, w-ps, ps//2):
                    all_desc.append(self.desc._hog(im[y:y+ps, x:x+ps]))
        return np.asarray(all_desc, dtype="float32")

    def _bow_hist(self, img):
        h, w = img.shape[:2]; ps = max(8, min(h, w)//4)
        hist = np.zeros(self.vocab, dtype="float32")
        for y in range(0, h-ps, ps//2):
            for x in range(0, w-ps, ps//2):
                d = self.desc._hog(img[y:y+ps, x:x+ps])
                c = np.argmin(((self.centers - d)**2).sum(1))
                hist[c] += 1
        s = hist.sum()
        if s > 0: hist /= s
        return hist

    # ------------------------------------------------------------------
    def query(self, img, topk=5):
        q = self.desc.vector(img, self.desc_type).astype("float32").reshape(1,-1)
        if 'faiss' in globals():
            D, I = self.idx_glob.search(q, topk)
            scores = 1/(1+D[0])
        else:
            D, I = self.nn.kneighbors(q, n_neighbors=topk)
            scores = 1/(1+D[0])
        return [(self.labels[i], float(scores[k])) for k,i in enumerate(I[0])]


# -------------------------------------------------------------------------
# 4 · Simple driver
# -------------------------------------------------------------------------
def load_dataset(root: Path):
    exts = {".png", ".jpg", ".jpeg", ".bmp"}
    imgs, labels = [], []
    for p in root.rglob("*"):
        if p.suffix.lower() not in exts:
            continue
        # label = last directory name ('A1' etc.)
        cls = p.parent.name
        img = cv2.imread(str(p))
        if img is not None:
            imgs.append(img)
            labels.append(cls)
    return imgs, labels


if __name__ == "__main__":
    # --- Installation of dependencies ---
    # First cell in Kaggle notebook should include:
    # !pip install -q opencv-python scikit-image tqdm cuml faiss-gpu cupy
    
    # --- Check for GPU ---
    print(f"GPU Available: {GPU_AVAILABLE}")
    
    # --- Set paths for Kaggle ---
    DATA = Path("/kaggle/input/glyphdataset/Dataset")
    TRAIN = DATA / "Manual"  # Actual training root
    
    # If the path doesn't exist, print available directories to help debugging
    if not TRAIN.exists():
        print(f"Warning: {TRAIN} doesn't exist")
        print("Available directories:")
        for path in Path("/kaggle/input").glob("**/*"):
            if path.is_dir() and not path.name.startswith('.'):
                print(f"  {path}")
        
        # Try to find an alternative path
        alt_paths = list(Path("/kaggle/input").glob("**/images"))
        if alt_paths:
            TRAIN = alt_paths[0]
            print(f"Using alternative path: {TRAIN}")

    # Load the dataset
    print(f"Looking for data in: {TRAIN}")
    imgs, lbls = load_dataset(TRAIN)
    if not imgs:
        raise RuntimeError(f"No PNG/JPG files found under {TRAIN}")

    print(f"Loaded {len(imgs)} glyph crops from {len(set(lbls))} classes")

    # Create matcher and fit
    print("Creating and fitting matcher...")
    matcher = Matcher(desc_type="hoosc", scheme="bow", vocab=200)
    matcher.fit(imgs, lbls)

    # Test with a few sample images
    print("\nTesting recognition with a few samples:")
    for i in range(min(5, len(imgs))):
        res = matcher.query(imgs[i], 3)
        print(f"Sample {i} ({lbls[i]}) → {res}")

Using FAISS CPU version with cupy
GPU Available: True
Looking for data in: /kaggle/input/glyphdataset/Dataset/Manual
Loaded 8420 glyph crops from 10 classes
Creating and fitting matcher...


global desc:   0%|          | 0/8420 [00:00<?, ?it/s]

Using CPU FAISS for indexing
Error with cuML KMeans: module 'cuml' has no attribute 'global_settings', falling back to CPU




building BoW:   0%|          | 0/8420 [00:00<?, ?it/s]


Testing recognition with a few samples:
Sample 0 (7) → [('7', 1.0), ('5', 0.9705153703689575), ('5', 0.9685636758804321)]
Sample 1 (7) → [('7', 1.0), ('22', 0.9656131863594055), ('22', 0.9626771807670593)]
Sample 2 (7) → [('7', 1.0), ('21', 0.9664347171783447), ('7', 0.9643725156784058)]
Sample 3 (7) → [('7', 1.0), ('21', 0.9759568572044373), ('23', 0.9694253206253052)]
Sample 4 (7) → [('7', 1.0), ('9', 0.9667506217956543), ('22', 0.9655954837799072)]
