In [1]:
import pandas as pd
import numpy as np
import cv2
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
# === LOAD CSV DATA ===
def load_csv_images(csv_path, img_size=(28, 28), max_samples=None):
    df = pd.read_csv(csv_path)
    y = df['label'].values
    X = df.drop('label', axis=1).values.astype(np.float32)
    if max_samples:
        X = X[:max_samples]
        y = y[:max_samples]
    X = X.reshape((-1, img_size[0], img_size[1]))
    return X, y

In [3]:
# === PATCH EXTRACTION ===
def extract_random_patches(images, patch_size=7, stride=1, max_patches=50000):
    patches = []
    for img in images:
        for i in range(0, img.shape[0] - patch_size + 1, stride):
            for j in range(0, img.shape[1] - patch_size + 1, stride):
                patch = img[i:i+patch_size, j:j+patch_size]
                patch = patch - np.mean(patch)
                patches.append(patch.flatten())
                if len(patches) >= max_patches:
                    return np.array(patches)
    return np.array(patches)

In [4]:
# === PCA FILTER LEARNING ===
def learn_pca_filters(patches, num_filters, patch_size):
    pca = PCA(n_components=num_filters)
    pca.fit(patches)
    filters = pca.components_.reshape((num_filters, patch_size, patch_size))
    return filters

In [5]:
# === CONVOLVE IMAGES ===
def convolve_images(images, filters):
    fmap_all = []
    for img in images:
        maps = [cv2.filter2D(img, -1, f) for f in filters]
        fmap_all.append(np.stack(maps))
    return np.array(fmap_all)  # shape: (N, num_filters, H, W)

In [6]:
# === BINARY HASHING + HISTOGRAM ===
def binary_hashing(feature_maps):
    bin_stack = (feature_maps > 0).astype(np.uint8)
    powers = 2 ** np.arange(bin_stack.shape[1])[::-1].reshape((-1, 1, 1))
    hashed = np.sum(bin_stack * powers, axis=1)
    return hashed

def block_histogram(images, block_size=(7, 7), num_bins=256, overlap=0.5):
    n, h, w = images.shape
    bh, bw = block_size
    step_h = int(bh * (1 - overlap))
    step_w = int(bw * (1 - overlap))
    features = []
    for img in images:
        blocks = []
        for i in range(0, h - bh + 1, step_h):
            for j in range(0, w - bw + 1, step_w):
                block = img[i:i + bh, j:j + bw]
                hist, _ = np.histogram(block, bins=num_bins, range=(0, num_bins))
                blocks.extend(hist)
        features.append(np.array(blocks))
    return np.stack(features)

In [None]:
# === RUN PCN ON CSV ===
def run_pcn_from_csv(csv_path):
    print("🔍 Loading CSV data...")
    X, y = load_csv_images(csv_path, max_samples=1000)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y, random_state=42)

    print("🧩 Extracting stage-1 patches...")
    patches1 = extract_random_patches(X_train, patch_size=7, stride=1, max_patches=50000)

    print("🔧 Training stage-1 PCA filters...")
    filters1 = learn_pca_filters(patches1, num_filters=6, patch_size=7)

    print("🎛 Convolution stage-1...")
    fmap1_train = convolve_images(X_train, filters1)
    fmap1_test = convolve_images(X_test, filters1)

    print("🧠 Extracting stage-2 patches...")
    combined_maps = np.array([np.sum(fm, axis=0) for fm in fmap1_train])
    patches2 = extract_random_patches(combined_maps, patch_size=7, stride=1, max_patches=50000)

    print("🔧 Training stage-2 PCA filters...")
    filters2 = learn_pca_filters(patches2, num_filters=11, patch_size=7)

    print("🎛 Convolution stage-2...")
    fmap2_train = np.array([np.stack([cv2.filter2D(np.sum(fm, axis=0), -1, f) for f in filters2]) for fm in fmap1_train])
    fmap2_test = np.array([np.stack([cv2.filter2D(np.sum(fm, axis=0), -1, f) for f in filters2]) for fm in fmap1_test])

    print("🔐 Binary hashing + histogram (train)...")
    hashed_train = binary_hashing(fmap2_train)
    features_train = block_histogram(hashed_train, block_size=(7, 7), overlap=0.5)

    print("🔐 Binary hashing + histogram (test)...")
    hashed_test = binary_hashing(fmap2_test)
    features_test = block_histogram(hashed_test, block_size=(7, 7), overlap=0.5)

    print("🏷 Training Linear SVM...")
    clf = LinearSVC(max_iter=3000)
    clf.fit(features_train, y_train)
    y_pred = clf.predict(features_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"✅ Final Test Accuracy: {acc * 100:.2f}%")

# Jalankan pipeline pada file mnist_test.csv
run_pcn_from_csv("mnist_test.csv")

🔍 Loading CSV data...
🧩 Extracting stage-1 patches...
🔧 Training stage-1 PCA filters...
🎛 Convolution stage-1...
🧠 Extracting stage-2 patches...
🔧 Training stage-2 PCA filters...
🎛 Convolution stage-2...
🔐 Binary hashing + histogram (train)...
🔐 Binary hashing + histogram (test)...
🏷 Training Linear SVM...
✅ Final Test Accuracy: 89.50%


