<a href="https://colab.research.google.com/github/Chamiln17/TLM-AUP/blob/main/AUP_Pretrained_YOLO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [54]:
!pip install ultralytics opencv-python-headless efficientnet-pytorch albumentations pandas scikit-learn openpyxl




In [55]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
import re
import yaml
from difflib import get_close_matches
from pathlib import Path
from PIL import Image
import albumentations as A
from albumentations import augmentations
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from efficientnet_pytorch import EfficientNet
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import random

def standardize_product_name(name):
    """Standardize product names for matching"""
    # Convert to lowercase
    name = name.lower()

    # Remove special characters and standardize spaces
    name = re.sub(r'[_\-\(\)]', ' ', name)

    # Remove volume indicators
    name = re.sub(r'\d+\s*[cl]l?', '', name)
    name = re.sub(r'\d+\s*ml', '', name)
    name = re.sub(r'\d+\s*L', '', name)

    # Remove specific words that might vary
    name = re.sub(r'\b(pet|pack|cannete|ramy)\b', '', name)

    # Clean up extra spaces
    name = ' '.join(name.split())

    return name.strip()

In [56]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
print('Drive mounted')

Mounted at /content/drive
Drive mounted


In [57]:
class DataAugmenter:
    def __init__(self, input_folder, output_folder, num_augmentations=5):
        self.input_folder = Path(input_folder)
        self.output_folder = Path(output_folder)
        self.num_augmentations = num_augmentations

        # Optimized augmentation pipeline for shelf products
        self.transform = A.Compose([
            # Lighting variations (most important for shelf products)
            A.OneOf([
                # Simulate different store lighting conditions
                A.RandomBrightnessContrast(
                    brightness_limit=(-0.3, 0.3),  # Increased range for store lighting
                    contrast_limit=(-0.2, 0.2),    # Moderate contrast variation
                    p=0.8
                ),
                # Simulate fluorescent lighting color variations
                A.ColorJitter(
                    brightness=0.3,
                    contrast=0.2,
                    saturation=0.1,  # Reduced saturation variation
                    hue=0.05,        # Minimal hue changes
                    p=0.8
                ),
            ], p=0.9),  # High probability of lighting augmentation

            # Shadows (common in shelf environments)
            A.RandomShadow(
                shadow_roi=(0, 0, 1, 0.5),  # Shadows mainly in upper half
                num_shadows_lower=1,
                num_shadows_upper=2,
                shadow_dimension=4,
                p=0.4
            ),

            # Minimal geometric transformations (products are usually upright)
            A.OneOf([
                A.HorizontalFlip(p=0.5),  # Products can be viewed from either side
                A.ShiftScaleRotate(
                    shift_limit=0.05,      # Minimal shift
                    scale_limit=0.1,       # Slight scale variation
                    rotate_limit=5,        # Very minimal rotation
                    p=0.5
                ),
            ], p=0.3),

            # Shelf-specific degradations
            A.OneOf([
                # Simulate slightly out-of-focus photos
                A.GaussianBlur(blur_limit=(3, 5), p=0.3),
                # Simulate lower light conditions
                A.MultiplicativeNoise(
                    multiplier=[0.8, 1.2],
                    per_channel=False,
                    p=0.2
                ),
            ], p=0.3),

            # Perspective variations (viewing angles on shelves)
            A.OneOf([
                A.Perspective(
                    scale=(0.02, 0.05),    # Reduced perspective change
                    p=0.3
                ),
                A.OpticalDistortion(
                    distort_limit=0.05,    # Minimal distortion
                    shift_limit=0.05,
                    p=0.3
                ),
            ], p=0.2),
        ])

    def augment_dataset(self):
        self.output_folder.mkdir(exist_ok=True, parents=True)
        total_images = len(list(self.input_folder.glob('*.png')))
        processed = 0

        for img_path in self.input_folder.glob('*.png'):
            try:
                image = cv2.imread(str(img_path))
                if image is None:
                    print(f"Warning: Could not read image {img_path}")
                    continue

                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Save original image
                original_output_path = self.output_folder / img_path.name
                Image.fromarray(image).save(original_output_path)

                # Generate augmented versions with progress tracking
                for i in range(self.num_augmentations):
                    augmented = self.transform(image=image)['image']
                    aug_filename = f"{img_path.stem}_aug_{i}{img_path.suffix}"
                    aug_output_path = self.output_folder / aug_filename
                    Image.fromarray(augmented).save(aug_output_path)

                processed += 1
                if processed % 10 == 0:  # Progress update every 10 images
                    print(f"Processed {processed}/{total_images} images ({(processed/total_images)*100:.1f}%)")

            except Exception as e:
                print(f"Error processing {img_path}: {str(e)}")
                continue

    def verify_augmentations(self):
        """Verify that augmented images are valid and readable"""
        print("\nVerifying augmented images...")
        invalid_images = []
        total_images = len(list(self.output_folder.glob('*.png')))
        verified = 0

        for img_path in self.output_folder.glob('*.png'):
            try:
                img = Image.open(img_path)
                img.verify()
                img.close()
                verified += 1
                if verified % 50 == 0:  # Progress update every 50 verifications
                    print(f"Verified {verified}/{total_images} images ({(verified/total_images)*100:.1f}%)")
            except Exception as e:
                invalid_images.append((img_path, str(e)))

        if invalid_images:
            print("\nFound invalid images:")
            for path, error in invalid_images:
                print(f"- {path}: {error}")
        else:
            print("\nAll augmented images are valid!")

In [58]:
class ProductClassifier(nn.Module):
    def __init__(self, num_classes):
        super(ProductClassifier, self).__init__()
        # Change to efficientnet-b7
        self.base_model = EfficientNet.from_pretrained('efficientnet-b2')
        self.features = self.base_model.extract_features
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.dropout = nn.Dropout(0.3)  # Increased 0.5 for B7 , 0.3 b2
        # Change the input features from 1280 b0 to 2560 (B7's feature size) 1408 b2
        self.classifier = nn.Linear(1408, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x).squeeze(-1).squeeze(-1)
        x = self.dropout(x)
        return self.classifier(x)
    def save_model(self, path, epoch, optimizer, scheduler, best_val_loss):
        """Save model checkpoint with additional training information"""
        torch.save({
            'epoch': epoch,
            'model_state_dict': self.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict() if scheduler else None,
            'best_val_loss': best_val_loss
        }, path)

    def load_model(self, path, optimizer=None, scheduler=None):
        """Load model checkpoint with additional training information"""
        checkpoint = torch.load(path)
        self.load_state_dict(checkpoint['model_state_dict'])
        if optimizer:
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if scheduler and checkpoint['scheduler_state_dict']:
            scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        return checkpoint['epoch'], checkpoint['best_val_loss']

In [59]:
class ProductDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.transform:
            img = self.transform(img)

        return img, label

In [60]:
class RamyProductDetector:
    def __init__(self, augmented_folder, products_file, model_path=None):
        # Update the detection model initialization to use fine-tuned weights if available
        if model_path and os.path.exists(model_path):
            self.detection_model = YOLO(model_path)
        else:
            self.detection_model = YOLO('yolov8x.pt')
        self.classification_model = None
        self.augmented_folder = Path(augmented_folder)
        self.products_df = pd.read_excel(products_file)
        self.class_mapping = {}
        self.inv_class_mapping = {}
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((260,260)),  # Increased from 224x224 , b2 260
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def prepare_data(self, epochs=30, batch_size=16):
        print("Preparing training data...")
        data = []

        # Create standardized product to family mapping from Excel file
        product_family_map = {}
        for index, row in self.products_df.iterrows():
            std_name = standardize_product_name(row['Produit'])
            product_family_map[std_name] = row['Famille']
            product_family_map[row['Produit']] = row['Famille']

        # Create family to index mapping with Others class
        unique_families = sorted(self.products_df['Famille'].unique())
        unique_families.append('Others')  # Add Others class
        self.family_mapping = {family: idx for idx, family in enumerate(unique_families)}
        self.inv_family_mapping = {idx: family for family, idx in self.family_mapping.items()}

        print(f"Found {len(self.family_mapping)} unique families (including Others) in Excel file")


        # Keep track of matching results
        products_not_found = set()
        products_matched = {}
        families_found = set()
        image_extensions = ['*.png', '*.jpg']
        for ext in image_extensions:
          for img_path in self.augmented_folder.glob(ext):
              product_name = img_path.stem.split('_aug_')[0]
              std_product_name = standardize_product_name(product_name)

              family = None
              matched_name = None

              # Try exact match first
              if product_name in product_family_map:
                  family = product_family_map[product_name]
                  matched_name = product_name
              elif std_product_name in product_family_map:
                  family = product_family_map[std_product_name]
                  matched_name = std_product_name
              else:
                  # Try fuzzy matching
                  excel_names = [standardize_product_name(p) for p in self.products_df['Produit']]
                  matches = get_close_matches(std_product_name, excel_names, n=1, cutoff=0.8)

                  if matches:
                      matched_std_name = matches[0]
                      for orig_name in self.products_df['Produit']:
                          if standardize_product_name(orig_name) == matched_std_name:
                              family = product_family_map[orig_name]
                              matched_name = orig_name
                              break

              if family is not None:
                  families_found.add(family)
                  family_idx = self.family_mapping[family]
                  data.append((img_path, family_idx))
                  products_matched[product_name] = matched_name
              else:
                  # Add to Others class for training
                  others_idx = self.family_mapping['Others']
                  data.append((img_path, others_idx))
                  products_not_found.add(product_name)

        # Print detailed report
        print("\nMatching Report:")
        print("================")
        print(f"Total images processed: {len(data) + len(products_not_found)}")
        print(f"Successfully matched: {len(data)}")
        print(f"Unmatched: {len(products_not_found)}")
        print(f"\nUnique families found: {len(families_found)}")
        print("Families and their products:")

        family_products = {}
        for prod, matched in products_matched.items():
            fam = product_family_map[matched]
            if fam not in family_products:
                family_products[fam] = []
            family_products[fam].append(prod)

        for family, products in family_products.items():
            print(f"\n{family}:")
            for prod in sorted(products):
                print(f"  - {prod} -> {products_matched[prod]}")

        print("\nUnmatched products:")
        for prod in sorted(products_not_found):
            print(f"  - {prod}")

        if not data:
            raise ValueError("No valid data found for training! Check product names and Excel file.")

        # Create inverse mapping for later use
        self.inv_family_mapping = {v: k for k, v in self.family_mapping.items()}

        # Split data into train and validation sets
        train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

        print(f"\nTraining set size: {len(train_data)}")
        print(f"Validation set size: {len(val_data)}")

        # Create datasets and dataloaders
        train_dataset = ProductDataset(train_data, self.transform)
        val_dataset = ProductDataset(val_data, self.transform)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size)

        # Initialize model with number of families instead of products
        self.classification_model = ProductClassifier(len(self.family_mapping))

        # Train the model
        self._train_model(train_loader, val_loader, epochs)

    def _train_model(self, train_loader, val_loader, epochs):
        print("Training classification model...")
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.classification_model.to(device)

        # Calculate class weights for balanced training
        all_labels = []
        for _, labels in train_loader:
            all_labels.extend(labels.numpy())

        class_counts = np.bincount(all_labels)
        total_samples = len(all_labels)
        class_weights = torch.FloatTensor(total_samples / (len(class_counts) * class_counts))
        class_weights = class_weights.to(device)

        # Use weighted loss for imbalanced classes
        criterion = nn.CrossEntropyLoss(weight=class_weights)
        optimizer = optim.AdamW(self.classification_model.parameters(), lr=2e-4, weight_decay=1e-5)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=3, factor=0.2)

        best_val_loss = float('inf')
        model_save_path = 'model_checkpoints'
        os.makedirs(model_save_path, exist_ok=True)

        # Add data augmentation for minority classes
        train_transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(10),
            transforms.ColorJitter(brightness=0.2, contrast=0.2),
            transforms.Resize((260, 260)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        for epoch in range(epochs):
            # Training phase with weighted sampling
            self.classification_model.train()
            train_loss = 0
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                # Apply augmentation to minority classes
                if random.random() < 0.5:  # 50% chance to apply augmentation
                    inputs = torch.stack([train_transform(img) for img in inputs])

                optimizer.zero_grad()
                outputs = self.classification_model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()

            # Validation phase
            self.classification_model.eval()
            val_loss = 0
            correct = 0
            total = 0
            class_correct = torch.zeros(len(self.family_mapping))
            class_total = torch.zeros(len(self.family_mapping))

            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = self.classification_model(inputs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    _, predicted = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()

                    # Per-class accuracy
                    for i in range(len(self.family_mapping)):
                        mask = labels == i
                        class_correct[i] += (predicted[mask] == labels[mask]).sum().item()
                        class_total[i] += mask.sum().item()

            avg_val_loss = val_loss / len(val_loader)
            scheduler.step(avg_val_loss)

            # Save model checkpoint
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                self.classification_model.save_model(
                    os.path.join(model_save_path, f'best_model_epoch_{epoch+1}.pth'),
                    epoch + 1,
                    optimizer,
                    scheduler,
                    best_val_loss
                )

            # Print training statistics
            print(f'Epoch {epoch+1}/{epochs}')
            print(f'Train Loss: {train_loss/len(train_loader):.4f}')
            print(f'Val Loss: {avg_val_loss:.4f}')
            print(f'Overall Val Accuracy: {100*correct/total:.2f}%')

            # Print per-class accuracy
            print("\nPer-class validation accuracy:")
            for family, idx in self.family_mapping.items():
                if class_total[idx] > 0:
                    accuracy = 100 * class_correct[idx] / class_total[idx]
                    print(f'{family}: {accuracy:.2f}%')

        # Load best model at the end of training
        self.classification_model.load_model(
            os.path.join(model_save_path, f'best_model_epoch_{epoch+1}.pth'),
            optimizer,
            scheduler
        )

    def detect_products(self, image_folder, output_folder):
      print("Detecting and classifying products...")
      results_list = []
      Path(output_folder).mkdir(exist_ok=True, parents=True)

      # Handle multiple image formats
      image_paths = []
      image_paths.extend(Path(image_folder).glob('*.png'))
      image_paths.extend(Path(image_folder).glob('*.jpg'))
      image_paths.extend(Path(image_folder).glob('*.jpeg'))

      for img_path in image_paths:
          image = cv2.imread(str(img_path))
          if image is None:
              print(f"Warning: Could not read image {img_path}")
              continue

          detections = self.detection_model(image)[0]

          for det in detections.boxes.data:
              x1, y1, x2, y2, conf, _ = det
              if conf > 0.1:  # YOLO confidence threshold
                  roi = image[int(y1):int(y2), int(x1):int(x2)]
                  if roi.size == 0:
                      continue

                  try:
                      classification = self.classify_product(roi)

                      # Define color based on classification
                      color = (0, 255, 0) if classification['family'] != 'Others' else (0, 0, 255)

                      results_list.append({
                          'image': img_path.name,
                          'product': classification['product'],
                          'family': classification['family'],
                          'confidence': float(conf),
                          'classification_confidence': classification.get('confidence', 0.0),
                          'x1': int(x1),
                          'y1': int(y1),
                          'x2': int(x2),
                          'y2': int(y2)
                      })

                      # Draw bounding box and label
                      cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
                      label = f"{classification['family']} ({classification.get('confidence', 0.0):.2f})"
                      cv2.putText(image, label, (int(x1), int(y1)-10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                  except Exception as e:
                      print(f"Error classifying ROI: {e}")
                      continue

          output_path = os.path.join(output_folder, f"annotated_{img_path.name}")
          cv2.imwrite(output_path, image)

      return pd.DataFrame(results_list)

    def classify_product(self, roi_image):
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.classification_model.eval()

        roi_image = cv2.cvtColor(roi_image, cv2.COLOR_BGR2RGB)
        roi_tensor = self.transform(roi_image).unsqueeze(0).to(device)

        with torch.no_grad():
            outputs = self.classification_model(roi_tensor)
            probabilities = torch.softmax(outputs, dim=1)
            confidence, predicted = torch.max(probabilities, 1)
            predicted_idx = predicted.item()
            confidence = confidence.item()

        if predicted_idx not in self.inv_family_mapping:
            print(f"Warning: Predicted index {predicted_idx} not found in mapping")
            return {'product': 'Unknown', 'family': 'Unknown', 'confidence': 0.0}

        family = self.inv_family_mapping[predicted_idx]

        try:
            # Get all products from this family
            family_products = self.products_df[self.products_df['Famille'] == family]['Produit']

            if family_products.empty:
                print(f"Warning: No products found for family {family}")
                return {'product': 'Unknown', 'family': family, 'confidence': confidence}

            # Select the first available product as representative
            product = family_products.iloc[0]

        except Exception as e:
            print(f"Error selecting product for family {family}: {e}")
            return {'product': 'Unknown', 'family': family, 'confidence': confidence}

        return {
            'product': product,
            'family': family,
            'confidence': confidence
        }

In [61]:
import json
from collections import defaultdict

class ProductAnalyzer:
    def __init__(self, detection_results_df):
        self.results_df = detection_results_df

    def analyze_product_appearances(self):
        """Analyze product appearances per image and generate comparison metrics"""
        analysis_results = {}

        # Group detections by image
        image_groups = self.results_df.groupby('image')

        for image_name, image_detections in image_groups:
            # Initialize counters for this image
            ramy_products = defaultdict(int)
            competitor_products = defaultdict(int)
            total_products = len(image_detections)

            # Count products
            for _, detection in image_detections.iterrows():
                family = detection['family']
                if family != 'Others':  # Ramy product
                    ramy_products[family] += 1
                else:
                    competitor_products['Others'] += 1

            # Calculate statistics
            total_ramy = sum(ramy_products.values())
            total_competitors = sum(competitor_products.values())

            # Prepare detailed analysis for this image
            image_analysis = {
                "total_products_detected": total_products,
                "ramy_products": {
                    "total": total_ramy,
                    "percentage": (total_ramy / total_products * 100) if total_products > 0 else 0,
                    "by_family": {
                        family: {
                            "count": count,
                            "percentage": (count / total_products * 100) if total_products > 0 else 0
                        }
                        for family, count in ramy_products.items()
                    }
                },
                "competitor_products": {
                    "total": total_competitors,
                    "percentage": (total_competitors / total_products * 100) if total_products > 0 else 0
                }
            }

            analysis_results[image_name] = image_analysis

        return analysis_results

    def generate_summary_stats(self, analysis_results):
        """Generate overall summary statistics"""
        total_detections = 0
        total_ramy = 0
        family_totals = defaultdict(int)
        total_competitors = 0

        for image_analysis in analysis_results.values():
            total_detections += image_analysis["total_products_detected"]
            total_ramy += image_analysis["ramy_products"]["total"]
            total_competitors += image_analysis["competitor_products"]["total"]

            for family, data in image_analysis["ramy_products"]["by_family"].items():
                family_totals[family] += data["count"]

        summary = {
            "overall_statistics": {
                "total_products_detected": total_detections,
                "total_ramy_products": total_ramy,
                "total_competitor_products": total_competitors,
                "ramy_market_share": (total_ramy / total_detections * 100) if total_detections > 0 else 0,
                "competitor_market_share": (total_competitors / total_detections * 100) if total_detections > 0 else 0
            },
            "ramy_family_distribution": {
                family: {
                    "total": count,
                    "percentage": (count / total_ramy * 100) if total_ramy > 0 else 0
                }
                for family, count in family_totals.items()
            }
        }

        return summary

    def export_to_json(self, output_path):
        """Generate and export the complete analysis to JSON"""
        analysis_results = self.analyze_product_appearances()
        summary_stats = self.generate_summary_stats(analysis_results)

        complete_analysis = {
            "summary": summary_stats,
            "per_image_analysis": analysis_results
        }

        # Export to JSON with proper formatting
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(complete_analysis, f, indent=2, ensure_ascii=False)

        return complete_analysis

def analyze_detections_and_export(detection_results_path, output_json_path):
    """Convenience function to analyze detections and export results"""
    # Read detection results
    detection_results = pd.read_excel(detection_results_path)

    # Create analyzer and export results
    analyzer = ProductAnalyzer(detection_results)
    analysis_results = analyzer.export_to_json(output_json_path)

    print(f"Analysis complete. Results saved to {output_json_path}")
    return analysis_results

In [62]:
# Setup paths
base_dir = '/content/drive/MyDrive/AUP'
reference_folder = os.path.join(base_dir, 'Ramy')
augmented_folder = os.path.join(base_dir, 'augmented')
rayonnage_folder = os.path.join(base_dir, 'Rayonnage')
output_folder = os.path.join(base_dir, 'output')
products_file = os.path.join(base_dir, 'Liste_Produit_Ramy1.xlsx')
yolo_model_path = os.path.join(base_dir, 'train/weights/best.pt')  # YOUR PRETRAINED MODEL PATH HERE

In [63]:
# Initialize augmenter
print("Starting data augmentation...")
augmenter = DataAugmenter(
    input_folder='/content/drive/MyDrive/AUP/Ramy',
    output_folder='/content/drive/MyDrive/AUP/augmented',
    num_augmentations=5
)

# Run augmentation
augmenter.augment_dataset()

# Verify results
augmenter.verify_augmentations()

Starting data augmentation...


  A.RandomShadow(
  original_init(self, **validated_kwargs)
  A.OpticalDistortion(


Processed 10/123 images (8.1%)
Processed 20/123 images (16.3%)
Processed 30/123 images (24.4%)
Processed 40/123 images (32.5%)
Processed 50/123 images (40.7%)
Processed 60/123 images (48.8%)
Processed 70/123 images (56.9%)
Processed 80/123 images (65.0%)
Processed 90/123 images (73.2%)
Processed 100/123 images (81.3%)
Processed 110/123 images (89.4%)
Processed 120/123 images (97.6%)

Verifying augmented images...
Verified 50/738 images (6.8%)
Verified 100/738 images (13.6%)
Verified 150/738 images (20.3%)
Verified 200/738 images (27.1%)
Verified 250/738 images (33.9%)
Verified 300/738 images (40.7%)
Verified 350/738 images (47.4%)
Verified 400/738 images (54.2%)
Verified 450/738 images (61.0%)
Verified 500/738 images (67.8%)
Verified 550/738 images (74.5%)
Verified 600/738 images (81.3%)
Verified 650/738 images (88.1%)
Verified 700/738 images (94.9%)

All augmented images are valid!


In [None]:
# Initialize and train detector with augmented dataset
print("Initializing product detector...")
detector = RamyProductDetector(augmented_folder, products_file, model_path=yolo_model_path)
detector.prepare_data(epochs=5)


Initializing product detector...
Preparing training data...
Found 46 unique families (including Others) in Excel file

Matching Report:
Total images processed: 778
Successfully matched: 738
Unmatched: 40

Unique families found: 19
Families and their products:

PET Boisson Energétique 33 CL:
  - ENERGY DRINK 33CL -> PET Energie drink 33 CL
  - ENERGY POWER FRUITD 33CL -> PET Energie Power Fruits 33 CL
  - PET_Energie_Classique_33cl -> energie classique
  - PET_Energie_Classique_33cl(1) -> PET Energie Classique 33CL
  - PET_Energie_Menthe_33cl -> energie menthe
  - PET_Energie_Miel_33cl -> energie miel
  - PET_Energie_Power_Fruits_33cl -> energie power fruits

PET Extra Jus 2 L:
  - PET_Extra_Ananas_2L -> extra ananas
  - extra_ananas30cl -> extra ananas

Ramy Kids 110 ML:
  - Cannete_Jus_Orange_Abricot_24cl -> Kids Orange Abricot 110 ML
  - Cannete_Jus_Orange_Peche_24cl -> Kids Orange Peche 110 ML
  - PET_Extra_Orange__Peche_Fraise_2L -> Kids Orange Peche Fraise 110 ML
  - PET_Ramy_Oran

  class_weights = torch.FloatTensor(total_samples / (len(class_counts) * class_counts))


Epoch 1/5
Train Loss: 3.3722
Val Loss: 2.9569
Overall Val Accuracy: 39.86%

Per-class validation accuracy:
Canette Jus 24 CL: 0.00%
PET Boisson Energétique 33 CL: 87.50%
PET Boisson Maltée 33 CL: 100.00%
PET Extra 30 CL: 75.00%
PET Extra Jus 2 L: 0.00%
PET Frutty 1.25L: 50.00%
PET Frutty Jus 2 L: 0.00%
PET Ramy 1,25 L: 64.71%
Pack Frutty 1 L: 100.00%
Pack Frutty 2 L: 85.71%
Pack Frutty Kids 20 CL: 0.00%
Pack Ramy 1 L: 0.00%
Pack Ramy 2 L: 40.00%
Ramy Kids 110 ML: 21.43%
Ramy Kids 125 ML: 100.00%
Ramy UP 20 CL: 57.14%
Water Fruits 1.25L: 100.00%
Water Fruits 33 CL: 0.00%
Others: 5.00%
Epoch 2/5
Train Loss: 2.0968
Val Loss: 1.4047
Overall Val Accuracy: 56.76%

Per-class validation accuracy:
Canette Jus 24 CL: 100.00%
PET Boisson Energétique 33 CL: 87.50%
PET Boisson Maltée 33 CL: 100.00%
PET Extra 30 CL: 100.00%
PET Extra Jus 2 L: 50.00%
PET Frutty 1.25L: 0.00%
PET Frutty Jus 2 L: 33.33%
PET Ramy 1,25 L: 100.00%
Pack Frutty 1 L: 100.00%
Pack Frutty 2 L: 57.14%
Pack Frutty Kids 20 CL: 100

In [None]:
# Detect products in rayonnage images
print("Processing rayonnage images...")
results_df = detector.detect_products(rayonnage_folder, output_folder)

# Save results
results_path = os.path.join(output_folder, "detections.xlsx")
results_df.to_excel(results_path, index=False)
print(f"Results saved to {results_path}")