## Load splits.json into a dictionary

In [14]:
import json
import math
from pathlib import Path

import cv2
import numpy as np
import pandas as pd

from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from skimage.filters import gabor

### Load & inspect splits.json

In [18]:
# Path to splits.json
SPLITS_FILE = Path('splits.json')

# Base directory that contains "geo fossil I"
# (so BASE_DIR / "geo fossil I\\Corals\\Coral172.jpg" exists)
BASE_DIR = SPLITS_FILE.parent

print("SPLITS_FILE:", SPLITS_FILE)
print("BASE_DIR:", BASE_DIR)
print("Exists?", SPLITS_FILE.exists(), BASE_DIR.exists())
with open(SPLITS_FILE, "r") as f:
    splits = json.load(f)

print("Keys in splits:", list(splits.keys()))
for k in splits:
    print(k, ":", len(splits[k]), "images")

print("\nExample paths from 'train':")
print(splits["train"][:5])

SPLITS_FILE: splits.json
BASE_DIR: .
Exists? True True
Keys in splits: ['train', 'val', 'test']
train : 839 images
val : 180 images
test : 181 images

Example paths from 'train':
['geo fossil I\\Corals\\Coral172.jpg', 'geo fossil I\\Ammonites\\Ammonite104.jpg', 'geo fossil I\\Belemnites\\Belemnite113.jpg', 'geo fossil I\\Crinoids\\Crinoid137.jpg', 'geo fossil I\\Corals\\Coral16.jpg']


In [27]:
cv2.imread('geo fossil\Corals\Coral1.jpg').shape

[ WARN:0@170.976] global loadsave.cpp:248 findDecoder imread_('geo fossil\Corals\Coral1.jpg'): can't open/read file: check file path/integrity


AttributeError: 'NoneType' object has no attribute 'shape'

### Basic image + mask helpers

In [12]:
def load_gray_image(path: Path):
    """
    Load an image as 8-bit grayscale (0â€“255).
    """
    img = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise FileNotFoundError(f"Could not read image: {path}")
    return img  # uint8, shape (H, W)


def make_mask(gray: np.ndarray, min_area: int = 200):
    """
    Simple fossil mask using Otsu threshold + basic morphology.
    Returns uint8 mask with values {0,1}.
    """
    # Otsu threshold
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Two candidates: foreground white vs foreground black
    mask_white = (thresh == 255)
    mask_black = (thresh == 0)

    # Fossil usually occupies smaller area than background -> pick smaller
    if mask_white.sum() < mask_black.sum():
        mask = mask_white
    else:
        mask = mask_black

    # Convert to uint8
    mask = mask.astype(np.uint8)

    # Morphological opening + closing to clean edges
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=1)

    # Remove tiny blobs
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)
    if num_labels > 1:
        # stats[0] is background; keep components >= min_area
        kept = np.zeros_like(mask)
        for i in range(1, num_labels):
            if stats[i, cv2.CC_STAT_AREA] >= min_area:
                kept[labels == i] = 1
        if kept.sum() > 0:
            mask = kept

    if mask.sum() == 0:
        # Fallback: full image
        mask = np.ones_like(mask, dtype=np.uint8)

    return mask


In [11]:
test_rel = splits["train"][0]
test_path = BASE_DIR / test_rel
gray_test = load_gray_image(test_path)
mask_test = make_mask(gray_test)

gray_test.shape, mask_test.shape, gray_test.dtype, mask_test.dtype, mask_test.sum()


FileNotFoundError: No such file: '/home/s.swapnil/Myfiles/Fossil-Patern-centric-AI-detection/geo fossil I\Corals\Coral172.jpg'

### Feature functions (intensity, GLCM, LBP)

In [8]:
def intensity_features(gray: np.ndarray, mask: np.ndarray):
    vals = gray[mask]
    mean = vals.mean()
    std = vals.std()
    eps = 1e-8
    centered = vals - mean
    skew = (centered**3).mean() / (std**3 + eps)
    kurt = (centered**4).mean() / (std**4 + eps)

    feats = np.array([mean, std, skew, kurt], dtype=np.float32)
    names = ["int_mean", "int_std", "int_skew", "int_kurt"]
    return feats, names


def glcm_features(img_uint8: np.ndarray, mask: np.ndarray):
    """
    GLCM features over whole image, with background = 0.
    """
    img_masked = img_uint8.copy()
    img_masked[~mask] = 0

    distances = [1, 2]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    props = ["contrast", "dissimilarity", "homogeneity",
             "energy", "correlation", "ASM"]

    glcm = greycomatrix(
        img_masked,
        distances=distances,
        angles=angles,
        levels=256,
        symmetric=True,
        normed=True,
    )

    feats = []
    names = []
    for p in props:
        vals = greycoprops(glcm, p)  # shape (len(distances), len(angles))
        for di, d in enumerate(distances):
            for ai, a in enumerate(angles):
                feats.append(vals[di, ai])
                names.append(f"glcm_{p}_d{d}_a{ai}")
    return np.array(feats, dtype=np.float32), names


def lbp_features(gray: np.ndarray, mask: np.ndarray,
                 P: int = 8, R: int = 1):
    """
    LBP histogram with 2^P = 256 bins, normalized.
    """
    lbp = local_binary_pattern(gray, P=P, R=R, method="default")
    lbp_masked = lbp[mask]
    n_bins = 2**P
    hist, _ = np.histogram(
        lbp_masked,
        bins=n_bins,
        range=(0, n_bins),
        density=True
    )
    names = [f"lbp_bin_{i:03d}" for i in range(n_bins)]
    return hist.astype(np.float32), names


### FFT radial spectrum + Gabor + shape

In [None]:
def fft_radial_features(gray: np.ndarray, mask: np.ndarray, n_bins: int = 16):
    """Radial power spectrum from 2D FFT."""
    g = gray.copy()
    g[~mask] = 0.0

    F = np.fft.fftshift(np.fft.fft2(g))
    mag = np.abs(F)

    h, w = mag.shape
    y, x = np.indices((h, w))
    cy, cx = (h - 1) / 2.0, (w - 1) / 2.0
    r = np.sqrt((x - cx)**2 + (y - cy)**2)

    r_max = r.max()
    bin_edges = np.linspace(0, r_max, n_bins + 1)

    radial = np.zeros(n_bins, dtype=np.float64)
    for i in range(n_bins):
        m = (r >= bin_edges[i]) & (r < bin_edges[i+1])
        if np.any(m):
            radial[i] = mag[m].mean()

    radial /= (radial.sum() + 1e-8)
    names = [f"fft_radial_{i:02d}" for i in range(n_bins)]
    return radial.astype(np.float32), names


def gabor_features(gray: np.ndarray, mask: np.ndarray):
    """Gabor filter bank: mean & std of magnitude inside mask."""
    freqs = [0.1, 0.2, 0.3, 0.4]
    thetas = [0, np.pi/4, np.pi/2, 3*np.pi/4]

    feats = []
    names = []
    for f in freqs:
        for j, theta in enumerate(thetas):
            real, imag = gabor(gray, frequency=f, theta=theta)
            mag = np.sqrt(real**2 + imag**2)
            vals = mag[mask]
            feats.append(vals.mean())
            feats.append(vals.std())
            names.append(f"gabor_f{f}_t{j}_mean")
            names.append(f"gabor_f{f}_t{j}_std")
    return np.array(feats, dtype=np.float32), names


def shape_features(mask: np.ndarray):
    """Basic shape stats from largest connected component."""
    labelled = morphology.label(mask)
    props = measure.regionprops(labelled)

    if len(props) == 0:
        feats = np.zeros(4, dtype=np.float32)
        names = ["shape_area", "shape_perimeter", "shape_ecc", "shape_solidity"]
        return feats, names

    region = max(props, key=lambda r: r.area)

    area = region.area
    perimeter = region.perimeter
    ecc = region.eccentricity
    solidity = region.solidity

    feats = np.array([area, perimeter, ecc, solidity], dtype=np.float32)
    names = ["shape_area", "shape_perimeter", "shape_ecc", "shape_solidity"]
    return feats, names


### Combine all features into one vector

In [9]:
def extract_all_features(gray: np.ndarray, mask: np.ndarray):
    """
    Compute ALL classical features for one image.
    Returns (feature_vector, feature_names_list).
    """
    img_uint8 = (gray * 255).astype("uint8")

    f_int, n_int = intensity_features(gray, mask)
    f_glcm, n_glcm = glcm_features(img_uint8, mask)
    f_lbp, n_lbp = lbp_features(gray, mask)
    f_fft, n_fft = fft_radial_features(gray, mask)
    f_gab, n_gab = gabor_features(gray, mask)
    f_shape, n_shape = shape_features(mask)

    feats = np.concatenate([f_int, f_glcm, f_lbp, f_fft, f_gab, f_shape])
    names = n_int + n_glcm + n_lbp + n_fft + n_gab + n_shape
    return feats, names
