### Template matching (POC)

#### template matching is a technique of finding matching feature in 2 images

![alt text](examples/example.webp "")

In [1]:
import os 
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

##### In this usecase we can use CNN based model to capture and extract features in an image like this

In [2]:
# tf.keras.Sequential([
#         tf.keras.layers.Conv2D(32, 3, activation='relu'),
#         tf.keras.layers.MaxPooling2D(),
#         tf.keras.layers.Conv2D(64, 3, activation='relu'),
#         ..., 
#         ...,
#     ])

##### Alternate and better approch to this problem is rather than training a model from scratch trained on limited data we can use other pretrained CNN based model like Resnet-50 or VGGNet

### Final Desicion

##### Using Resnet or VGG models works but only in static window means, scale of query image and backgroud image cannot be changed else the image wont get detected and since image data availability of a single product is minimum and 'one-shot learning' technique is suited, so Yolo model is selected with one-shot simulated approch

In [None]:
import os
import cv2
import random

def generate_multiclass_synthetic(
    query_root, bg_dir, output_dir, num_images=500):
    
    os.makedirs(f"{output_dir}/images/train", exist_ok=True)
    os.makedirs(f"{output_dir}/labels/train", exist_ok=True)

    """
    1. take a main directory inside it sub directories each with each products image examples 
    2. create dictionary with sub directory and path of all images inside it eg: {"class_name":[path1,path,...]}
      
    """

    class_folders = sorted([d for d in os.listdir(query_root) if os.path.isdir(os.path.join(query_root, d))])
    class_map = {class_name: idx for idx, class_name in enumerate(class_folders)}
    print(f"Detected {len(class_map)} classes: {class_map}")

    """
    3. for n times choose random background images and find its shape like (24,24,3) use it to find height and width
    4. take random k (1,5) for each image in k is random sub dir name and random image in it 
    5. take height and width of background and find random scale (0.4,0.6) to scale product random image height and width use it to resize product image 
     
    """

    for i in range(num_images):
        # List background images
        bg_files = [f for f in os.listdir(bg_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
        bg_path = random.choice(bg_files)
        bg_img = cv2.imread(os.path.join(bg_dir, bg_path))

        if bg_img is None:
            print(f"error could not load background {bg_path}")
            continue

        h_bg, w_bg = bg_img.shape[:2]
        annotations = []
 
        num_objects = random.randint(1, 5)
        for _ in range(num_objects):
            class_name = random.choice(class_folders)
            class_id = class_map[class_name]
            class_dir = os.path.join(query_root, class_name)
            samples = [f for f in os.listdir(class_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
            if not samples:
                continue

            sample_img = random.choice(samples)
            query_path = os.path.join(class_dir, sample_img)
            query_img = cv2.imread(query_path, cv2.IMREAD_COLOR)

            if query_img is None:
                print(f"error could not load {query_path}")
                continue

            h_q, w_q = query_img.shape[:2]
            scale = random.uniform(0.4, 0.6)
            new_w = int(w_q * scale)
            new_h = int(h_q * scale)

            if new_w >= w_bg or new_h >= h_bg:
                continue

            query_resized = cv2.resize(query_img, (new_w, new_h))

            """ 
            
            6. use resize product image to replace the original image portion by taking a random point such that image wont go outside the backgroud 
            7. also save the annotation x-centered, y-centered, width, height relative to backgroud image

            """
            x = random.randint(0, w_bg - new_w)
            y = random.randint(0, h_bg - new_h)

            
            bg_img[y:y+new_h, x:x+new_w] = query_resized
            x_center = (x + new_w / 2) / w_bg
            y_center = (y + new_h / 2) / h_bg
            width = new_w / w_bg
            height = new_h / h_bg
            annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

        """ 
        8. use the annotation and save the model and annotation under same number different directory "images/train" and "labels/train" to make it yolo friendly
        """
        img_name = f"img_{i:05}.jpg"
        label_name = f"img_{i:05}.txt"
        img_path = f"{output_dir}/images/train/{img_name}"
        label_path = f"{output_dir}/labels/train/{label_name}"

        cv2.imwrite(img_path, bg_img)

        with open(label_path, "w") as f:
            f.write("\n".join(annotations) + "\n")

    return class_map

In [4]:
class_map = generate_multiclass_synthetic(
    query_root="examples/classes/",
    bg_dir="examples/images/",
    output_dir="examples/yolo_data/",
    num_images=500
)


Detected 2 classes: {'cheerios': 0, 'mini_wheats': 1}


In [5]:
import yaml

def write_data_yaml(class_map, output_path="examples/yolo_data/data.yaml"):
    data = {
       
        "train": "images/train",
        "val": "images/train",
        "nc": len(class_map),
        "names": list(class_map.keys())
    }
    with open(output_path, "w") as f:
        yaml.dump(data, f)

write_data_yaml(class_map)


In [None]:
# !pip install ultralytics

In [None]:
# !yolo detect train model=yolov8n.pt data=examples/yolo_data/data.yaml epochs=5 imgsz=640

#### Yolo documentation : https://docs.ultralytics.com/tasks/detect/#predict

In [6]:
from ultralytics import YOLO

model = YOLO("runs/detect/train/weights/best.pt")  # load a custom model

# Validate the model
metrics = model.val()  # no arguments needed, dataset and settings remembered
metrics.box.map  # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps  # 

Ultralytics 8.3.175 🚀 Python-3.12.3 torch-2.5.1+cu124 CPU (12th Gen Intel Core(TM) i5-12450H)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 580.7±19.7 MB/s, size: 3747.2 KB)


[34m[1mval: [0mScanning /home/rijo/Documents/Cprism/experiment/examples/yolo_data/labels/train.cache... 500 images, 2 backgrounds, 0 corrupt: 100%|██████████| 500/500 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 32/32 [01:05<00:00,  2.05s/it]


                   all        500       1389      0.999      0.978      0.991      0.972
              cheerios        376        661      0.999      0.991      0.995      0.986
           mini_wheats        396        728      0.999      0.964      0.987      0.959
Speed: 1.1ms preprocess, 48.2ms inference, 0.0ms loss, 4.3ms postprocess per image
Results saved to [1mruns/detect/val2[0m


array([    0.98565,      0.9587])

#### This model is trained on 5 epochs of 500 synthetic images

In [28]:
results = model("examples/shelves.jpg", 
                imgsz=1280,       
                iou=0.5,          
                conf=0.3,        
                agnostic_nms=True, 
                max_det=100)      

for result in results:
    boxes = result.boxes  # Boxes object for bounding box output
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen



image 1/1 /home/rijo/Documents/Cprism/experiment/examples/shelves.jpg: 736x1280 4 cheerioss, 13 mini_wheatss, 219.9ms
Speed: 31.1ms preprocess, 219.9ms inference, 9.8ms postprocess per image at shape (1, 3, 736, 1280)
Opening in existing browser session.


<img src="tmp9y3n4mx4.png" alt="alt text" width="60%">

### Log model to Dagshub (Mlflow in an instance for production)

In [7]:
precision = metrics.box.map50

In [9]:
inference_params = {
    "imgsz": 1280,
    "iou": 0.5,
    "conf": 0.3,
    "agnostic_nms": True,
    "max_det": 100
}

In [13]:
import mlflow
import dagshub

dagshub.init(repo_owner='slalrijo2005', repo_name='Cprism', mlflow=True)
URI = "https://dagshub.com/slalrijo2005/Cprism.mlflow"

mlflow.set_tracking_uri(URI)
mlflow.set_experiment("Product_detection")

model_path = "runs/detect/train/weights/best.pt"

with mlflow.start_run(run_name="first_iter"):
    mlflow.log_artifact(model_path, artifact_path="model_one")  # logs the .pt file
    mlflow.log_param("model_type", "YOLOv8n")
    mlflow.log_param("format", ".pt")
    mlflow.log_params(inference_params)
    mlflow.log_metric("precision", precision)



🏃 View run first_iter at: https://dagshub.com/slalrijo2005/Cprism.mlflow/#/experiments/0/runs/698e7a54ae104b9bad9d5ff90c126b35
🧪 View experiment at: https://dagshub.com/slalrijo2005/Cprism.mlflow/#/experiments/0


#### Register model (Not useful in this case you can only load from experiment even after register because model is in form of artifact)

In [None]:
model_run_id = "23f72a8af2fc4f68a1a5fc28d779ca20" #secret (revealed temp for machine task)
model_artifact_path = "model_one"
model_uri = f"runs:/{model_run_id}/{model_artifact_path}"
mlflow.register_model(
    model_uri=model_uri,
    name="Product_detection"
)


Successfully registered model 'Product_detection'.
2025/08/07 19:00:22 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Product_detection, version 1
Created version '1' of model 'Product_detection'.


<ModelVersion: aliases=[], creation_timestamp=1754573422529, current_stage='None', description='', last_updated_timestamp=1754573422529, name='Product_detection', run_id='23f72a8af2fc4f68a1a5fc28d779ca20', run_link='', source='mlflow-artifacts:/7b9e474cfffb45179c7d571c32ad3b15/23f72a8af2fc4f68a1a5fc28d779ca20/artifacts/model_one', status='READY', status_message=None, tags={}, user_id='', version='1'>

In [None]:
artifact_uri = "mlflow-artifacts:/7b9e474cfffb45179c7d571c32ad3b15/23f72a8af2fc4f68a1a5fc28d779ca20/artifacts/model_one/best.pt" #secret (revealed temp for machine task)
loaded_model = mlflow.artifacts.download_artifacts(artifact_uri=artifact_uri)
 

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [21]:
loaded_model = YOLO(loaded_model)

In [22]:
results = loaded_model("examples/shelves.jpg", 
                imgsz=1280,       
                iou=0.5,          
                conf=0.3,        
                agnostic_nms=True, 
                max_det=100)      

for result in results:
    boxes = result.boxes  # Boxes object for bounding box output
    masks = result.masks  # Masks object for segmentation masks outputs
    keypoints = result.keypoints  # Keypoints object for pose outputs
    probs = result.probs  # Probs object for classification outputs
    obb = result.obb  # Oriented boxes object for OBB outputs
    result.show()  # display to screen





image 1/1 /home/rijo/Documents/Cprism/experiment/examples/shelves.jpg: 736x1280 4 cheerioss, 13 mini_wheatss, 327.6ms
Speed: 38.5ms preprocess, 327.6ms inference, 7.6ms postprocess per image at shape (1, 3, 736, 1280)
Opening in existing browser session.


### Better?

#### Better approch that is more stable and zero shot will be image feature embedding model with sliding window approch on different scale but need more that 50hrs to experiment and build a production level model eg: https://github.com/facebookresearch/dino