In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

def load_images(folder_path, label):
    """
    Loads grayscale images from a folder and returns them along with the label.
    """
    images = []
    labels = []
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            # Resize to a fixed size (optional, here we use 128x128)
            img = cv2.resize(img, (128, 128))
            images.append(img)
            labels.append(label)
    return images, labels

def compute_features(image):
    """
    Extract several features from a given QR image:
      - Laplacian variance: Edge sharpness.
      - Mean intensity: Overall brightness.
      - Standard deviation: Contrast measure.
      - Dark pixel ratio: Proportion of dark pixels after thresholding.
    """
    # Laplacian variance for edge sharpness
    lap_var = cv2.Laplacian(image, cv2.CV_64F).var()

    # Mean and standard deviation of pixel intensities
    mean_intensity = np.mean(image)
    std_intensity = np.std(image)

    # Threshold image to binary (assumes QR codes are mostly black & white)
    _, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY)
    # Dark pixels (value 0) ratio
    dark_ratio = np.sum(thresh == 0) / thresh.size

    return lap_var, mean_intensity, std_intensity, dark_ratio

def analyze_dataset(original_folder, counterfeit_folder):
    # Load images from each folder
    orig_imgs, orig_labels = load_images(original_folder, 0)
    counterfeit_imgs, counterfeit_labels = load_images(counterfeit_folder, 1)

    # Combine datasets for later use if needed
    all_imgs = orig_imgs + counterfeit_imgs
    all_labels = orig_labels + counterfeit_labels

    # Extract features
    features = {"lap_var": [], "mean": [], "std": [], "dark_ratio": []}
    labels = []  # 0 for original, 1 for counterfeit
    for img, lab in zip(all_imgs, all_labels):
        f = compute_features(img)
        features["lap_var"].append(f[0])
        features["mean"].append(f[1])
        features["std"].append(f[2])
        features["dark_ratio"].append(f[3])
        labels.append(lab)

    features = {k: np.array(v) for k, v in features.items()}
    labels = np.array(labels)

    return features, labels, orig_imgs, counterfeit_imgs

def plot_feature_distributions(features, labels, feature_name, class_names=["Original", "Counterfeit"]):
    plt.figure(figsize=(8, 6))
    sns.histplot(features[feature_name][labels==0], color='blue', label=class_names[0], kde=True, stat="density", bins=20)
    sns.histplot(features[feature_name][labels==1], color='red', label=class_names[1], kde=True, stat="density", bins=20)
    plt.xlabel(feature_name)
    plt.title(f"Distribution of {feature_name}")
    plt.legend()
    plt.show()

# # Set your folder paths here
original_folder = "/content/drive/MyDrive/Datasets/QR_Classifier/First Print"      # e.g., "./data/original"
counterfeit_folder = "/content/drive/MyDrive/Datasets/QR_Classifier/Second Print"  # e.g., "./data/counterfeit"


# Analyze dataset and extract features
features, labels, orig_imgs, counterfeit_imgs = analyze_dataset(original_folder, counterfeit_folder)

# Print basic statistics
print("Feature Statistics:")
for key, vals in features.items():
    print(f"{key} (Original): mean={np.mean(vals[labels==0]):.2f}, std={np.std(vals[labels==0]):.2f}")
    print(f"{key} (Counterfeit): mean={np.mean(vals[labels==1]):.2f}, std={np.std(vals[labels==1]):.2f}")
    print("--------------------------------------------------")

# Plot feature distributions for comparison
plot_feature_distributions(features, labels, "lap_var")
plot_feature_distributions(features, labels, "mean")
plot_feature_distributions(features, labels, "std")
plot_feature_distributions(features, labels, "dark_ratio")

# Optionally, visualize a few sample images from each class
def show_samples(images, title, num=5):
    plt.figure(figsize=(15, 3))
    for i in range(num):
        plt.subplot(1, num, i+1)
        plt.imshow(images[i], cmap='gray')
        plt.title(title)
        plt.axis('off')
    plt.show()

show_samples(orig_imgs, "Original")
show_samples(counterfeit_imgs, "Counterfeit")


In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler
from skimage.feature import graycomatrix, graycoprops

import tensorflow as tf
import tensorflow_hub as hub  # If needed for EfficientNet from TF Hub
from tensorflow.keras.layers import (Input, Dense, Dropout, BatchNormalization,
                                     GlobalAveragePooling2D, concatenate)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input

# -------------------------------
# 1. GLCM Feature Extraction
# -------------------------------
def extract_glcm_features(gray_img, distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4]):
    """
    Compute GLCM features (contrast, dissimilarity, homogeneity, energy, correlation, ASM)
    averaged over multiple distances & angles.

    :param gray_img: Grayscale image (2D NumPy array)
    :param distances: list of pixel distances
    :param angles: list of angles in radians
    :return: 1D NumPy array of GLCM feature means
    """
    # Ensure 8-bit grayscale
    gray_img = gray_img.astype(np.uint8)

    # Compute GLCM
    glcm = greycomatrix(gray_img, distances=distances, angles=angles,
                        levels=256, symmetric=True, normed=True)

    features = []
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']
    for prop in props:
        vals = greycoprops(glcm, prop)
        mean_val = np.mean(vals)
        features.append(mean_val)

    return np.array(features, dtype=np.float32)

# -------------------------------
# 2. Data Loading
# -------------------------------
def load_images_and_labels(original_dir, counterfeit_dir):
    """
    Loads images from two folders (original, counterfeit).
    Returns a list of images and corresponding labels (0=original, 1=counterfeit).
    """
    original_paths = [os.path.join(original_dir, f)
                      for f in os.listdir(original_dir) if f.lower().endswith(('.png','.jpg','.jpeg'))]
    counterfeit_paths = [os.path.join(counterfeit_dir, f)
                         for f in os.listdir(counterfeit_dir) if f.lower().endswith(('.png','.jpg','.jpeg'))]

    X_imgs = []
    y_labels = []

    # Label 0 for original
    for p in original_paths:
        img = cv2.imread(p, cv2.IMREAD_COLOR)  # We'll do color for EfficientNet
        if img is not None:
            X_imgs.append(img)
            y_labels.append(0)

    # Label 1 for counterfeit
    for p in counterfeit_paths:
        img = cv2.imread(p, cv2.IMREAD_COLOR)
        if img is not None:
            X_imgs.append(img)
            y_labels.append(1)

    return X_imgs, np.array(y_labels)

# -------------------------------
# 3. EfficientNet Feature Extraction (Image Embeddings)
# -------------------------------
def create_efficientnet_encoder():
    """
    Create a pretrained EfficientNetB0 model for extracting image embeddings
    (excluding final classification layers).
    """
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224,224,3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)  # We'll get a 1280-d vector for B0
    encoder = tf.keras.Model(inputs=base_model.input, outputs=x)

    # Freeze base layers for faster training (optional)
    for layer in base_model.layers:
        layer.trainable = False

    return encoder

def preprocess_efficientnet(img):
    """
    Preprocess image to 224x224 with 3 channels for EfficientNet.
    """
    img_resized = cv2.resize(img, (224,224))
    img_resized = img_resized[..., ::-1]  # BGR -> RGB if needed
    img_resized = img_resized.astype(np.float32)
    return preprocess_input(img_resized)  # From tf.keras.applications.efficientnet

# -------------------------------
# 4. Hybrid Model (CNN Embeddings + GLCM Features)
# -------------------------------
def build_hybrid_model(effnet_dim, glcm_dim):
    """
    Build a hybrid model that merges:
    - effnet_dim: Dimension of EfficientNet embeddings (e.g. 1280 for B0)
    - glcm_dim: Dimension of GLCM features (6 in our example)
    """
    # Image embedding input
    img_input = Input(shape=(effnet_dim,), name='img_embedding')

    # GLCM feature input
    glcm_input = Input(shape=(glcm_dim,), name='glcm_features')

    # Merge
    merged = concatenate([img_input, glcm_input])

    x = Dense(256, activation='relu')(merged)
    x = Dropout(0.5)(x)
    x = Dense(64, activation='relu')(x)
    out = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=[img_input, glcm_input], outputs=out)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# -------------------------------
# 5. Putting It All Together
# -------------------------------
def main_pipeline(original_dir, counterfeit_dir):
    # 1. Load images and labels
    X_imgs, y = load_images_and_labels(original_dir, counterfeit_dir)

    # 2. Prepare arrays for embeddings & GLCM
    #    We'll build them for each image
    effnet_encoder = create_efficientnet_encoder()  # Pretrained EfficientNet

    glcm_features_list = []
    effnet_embeddings_list = []

    print("Extracting features...")
    for img in X_imgs:
        # GLCM extraction (on grayscale)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        glcm_feats = extract_glcm_features(gray)  # We'll define this function below
        glcm_features_list.append(glcm_feats)

        # EfficientNet embedding
        prep_img = preprocess_efficientnet(img)  # shape=(224,224,3)
        prep_img = np.expand_dims(prep_img, axis=0)  # (1,224,224,3)
        emb = effnet_encoder.predict(prep_img)
        effnet_embeddings_list.append(emb[0])  # shape=(1280,)

    X_glcm = np.array(glcm_features_list)              # shape=(N,6)
    X_effnet = np.array(effnet_embeddings_list)        # shape=(N,1280) for B0
    print("Done. Shapes:", X_glcm.shape, X_effnet.shape)

    # 3. Train/Test Split
    X_train_glcm, X_test_glcm, X_train_eff, X_test_eff, y_train, y_test = train_test_split(
        X_glcm, X_effnet, y, test_size=0.2, stratify=y, random_state=42
    )

    # Scale GLCM features
    scaler = StandardScaler()
    X_train_glcm = scaler.fit_transform(X_train_glcm)
    X_test_glcm = scaler.transform(X_test_glcm)

    # 4. Build Hybrid Model
    hybrid_net = build_hybrid_model(effnet_dim=X_train_eff.shape[1],
                                    glcm_dim=X_train_glcm.shape[1])

    # 5. Train
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)
    ]

    history = hybrid_net.fit(
        [X_train_eff, X_train_glcm], y_train,
        validation_data=([X_test_eff, X_test_glcm], y_test),
        epochs=30, batch_size=8, callbacks=callbacks
    )

    # 6. Evaluate
    y_pred_prob = hybrid_net.predict([X_test_eff, X_test_glcm])
    y_pred = (y_pred_prob > 0.5).astype(int)

    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    # 7. Optional: Evaluate with different thresholds
    best_f1 = 0
    best_t = 0.5
    for t in np.linspace(0.1, 0.9, 50):
        yp = (y_pred_prob > t).astype(int)
        f1 = f1_score(y_test, yp)
        if f1 > best_f1:
            best_f1 = f1
            best_t = t
    print(f"Best threshold={best_t:.2f} with F1={best_f1:.3f}")

    # 8. Plot training curves
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(history.history['accuracy'], label='Train Acc')
    plt.plot(history.history['val_accuracy'], label='Val Acc')
    plt.legend()
    plt.title("Accuracy")

    plt.subplot(1,2,2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.legend()
    plt.title("Loss")
    plt.show()

    return hybrid_net, scaler, best_t

# 6. Extra: GLCM extraction function (like we used in the pipeline)
def extract_glcm_features(gray_img):
    """
    Example GLCM function that returns 6 features:
    contrast, dissimilarity, homogeneity, energy, correlation, ASM
    for distances=[1], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
    then averaged across angles.
    """

    gray_img = gray_img.astype(np.uint8)
    distances = [1]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]
    glcm = graycomatrix(gray_img, distances=distances, angles=angles,
                           levels=256, symmetric=True, normed=True)

    features = []
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']
    for prop in props:
        val = graycoprops(glcm, prop)
        features.append(val.mean())
    return np.array(features, dtype=np.float32)

# 7. Entry Point
if __name__ == "__main__":
    # Example usage
    # Replace with your actual folders containing .png/.jpg
    original_dir = "/content/drive/MyDrive/Datasets/QR_Classifier/First Print"
    counterfeit_dir = "/content/drive/MyDrive/Datasets/QR_Classifier/Second Print"



    model, glcm_scaler, best_threshold = main_pipeline(original_dir, counterfeit_dir)

    # Save model if needed
    model.save("glcm_efficientnet_hybrid.h5")
