In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
import torch
import torch.nn as nn
from torchvision.models import vgg19, VGG19_Weights

DATA_DIR = "/content/drive/MyDrive/test image"
OUTPUT_DIR = "./vgg19_encodings"
MAX_PROPOSALS = 50
MIN_PROP_AREA = 5000
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

os.makedirs(OUTPUT_DIR, exist_ok=True)

def build_vgg19_fc2(device):
    weights = VGG19_Weights.DEFAULT
    vgg = vgg19(weights=weights)
    vgg.eval().to(device)
    feature_extractor = nn.Sequential(
        vgg.features,
        nn.Flatten(start_dim=1),
        *list(vgg.classifier.children())[:6]
    ).to(device)
    feature_extractor.eval()
    transform = weights.transforms()
    return feature_extractor, transform

def selective_search_proposals(img_bgr):
    try:
        ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
        ss.setBaseImage(img_bgr)
        ss.switchToSelectiveSearchFast()
        rects = ss.process()
        seen, unique = set(), []
        for (x, y, w, h) in rects:
            if (x, y, w, h) not in seen:
                seen.add((x, y, w, h))
                unique.append((x, y, w, h))
        return unique
    except Exception as e:
        print("Selective Search not available, using sliding windows.", e)
        return []

def sliding_window_proposals(img_shape, step_ratio=0.2, window_sizes=((224,224),(256,256),(180,180))):
    h, w = img_shape[:2]
    rects = []
    for win_w, win_h in window_sizes:
        step_x = max(8, int(win_w * step_ratio))
        step_y = max(8, int(win_h * step_ratio))
        for y in range(0, h - win_h + 1, step_y):
            for x in range(0, w - win_w + 1, step_x):
                rects.append((x, y, win_w, win_h))
    return rects

def iou(boxA, boxB):
    xA, yA = max(boxA[0], boxB[0]), max(boxA[1], boxB[1])
    xB, yB = min(boxA[0]+boxA[2], boxB[0]+boxB[2]), min(boxA[1]+boxA[3], boxB[1]+boxB[3])
    inter = max(0, xB-xA) * max(0, yB-yA)
    union = boxA[2]*boxA[3] + boxB[2]*boxB[3] - inter
    return inter / union if union > 0 else 0.0

def filter_rects(rects, shape, max_props, min_area, iou_thresh=0.95):
    rects = [r for r in rects if r[2]*r[3] >= min_area]
    rects.sort(key=lambda r: r[2]*r[3], reverse=True)
    selected = []
    for r in rects:
        if all(iou(r, s) < iou_thresh for s in selected):
            x, y, w, h = r
            x, y = max(0, x), max(0, y)
            w, h = min(w, shape[1]-x), min(h, shape[0]-y)
            if w > 0 and h > 0:
                selected.append((x, y, w, h))
        if len(selected) >= max_props:
            break
    return selected

def encode_image_regions(image_path, model, transform, device):
    img_bgr = cv2.imread(image_path)
    if img_bgr is None:
        raise FileNotFoundError(f"Cannot read image: {image_path}")
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    rects = selective_search_proposals(img_bgr)
    if not rects:
        rects = sliding_window_proposals(img_rgb.shape)
    rects = filter_rects(rects, img_rgb.shape, MAX_PROPOSALS, MIN_PROP_AREA)

    if len(rects) == 0:
        return np.zeros((0,4096)), pd.DataFrame(columns=["x","y","w","h"])

    tensors, meta = [], []
    for (x,y,w,h) in rects:
        crop = Image.fromarray(img_rgb[y:y+h, x:x+w])
        tensors.append(transform(crop))
        meta.append((x,y,w,h))

    xs = torch.stack(tensors).to(device)
    with torch.no_grad():
        feats = []
        for i in range(0, len(xs), 16):
            out = model(xs[i:i+16])
            feats.append(out.cpu().numpy())
        feats = np.vstack(feats)

    df_meta = pd.DataFrame(meta, columns=["x","y","w","h"])
    return feats, df_meta

def find_images(folder):
    exts = {".jpg",".jpeg",".png",".bmp",".tiff"}
    images = []
    for root, _, files in os.walk(folder):
        for f in files:
            if os.path.splitext(f.lower())[1] in exts:
                images.append(os.path.join(root, f))
    return sorted(images)

def main():
    device = torch.device(DEVICE)
    model, transform = build_vgg19_fc2(device)
    images = find_images(DATA_DIR)

    if len(images) == 0:
        print(" No images found in folder:", DATA_DIR)
        return

    print(f"Found {len(images)} image(s). Device: {device}")
    summary_rows = []

    for im_path in tqdm(images, desc="Encoding images"):
        try:
            feats, df_meta = encode_image_regions(im_path, model, transform, device)
            base = os.path.splitext(os.path.basename(im_path))[0]
            np.save(os.path.join(OUTPUT_DIR, f"{base}_proposals_feats.npy"), feats)
            df_meta.to_csv(os.path.join(OUTPUT_DIR, f"{base}_proposals_meta.csv"), index=False)
            summary_rows.append({
                "image": im_path,
                "n_proposals": len(df_meta),
                "feat_file": f"{base}_proposals_feats.npy",
                "meta_file": f"{base}_proposals_meta.csv"
            })
        except Exception as e:
            print(f"Error processing {im_path}: {e}")

    pd.DataFrame(summary_rows).to_csv(os.path.join(OUTPUT_DIR, "summary.csv"), index=False)
    print("\n Done. Encodings saved to:", OUTPUT_DIR)

if __name__ == "__main__":
    main()