In [1]:
"""
Product Matcher v4 - GPU Optimized (RTX 3050 Ready)
===================================================
- –ò—Å–ø—Ä–∞–≤–ª–µ–Ω—ã –≤—Å–µ deprecated –ø–∞—Ä–∞–º–µ—Ç—Ä—ã
- –û–ø—Ç–∏–º–∏–∑–∞—Ü–∏—è –¥–ª—è 4GB VRAM
- FAISS –Ω–∞ CPU (GPU –≤–µ—Ä—Å–∏—è –æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–∞)
- Encoder + Reranker –Ω–∞ GPU
"""

import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, CrossEncoder
import faiss
import re
from tqdm import tqdm
from typing import List, Dict, Tuple, Optional
import warnings
import gc
import os
from pathlib import Path
import torch

warnings.filterwarnings('ignore')


def check_gpu_status():
    """–ü—Ä–æ–≤–µ—Ä–∫–∞ —Å—Ç–∞—Ç—É—Å–∞ GPU"""
    print("=" * 60)
    print("üîç –ü–†–û–í–ï–†–ö–ê GPU")
    print("=" * 60)
    
    print(f"\nüì¶ PyTorch version: {torch.__version__}")
    print(f"üîß CUDA available: {torch.cuda.is_available()}")
    
    if torch.cuda.is_available():
        print(f"üéÆ CUDA version: {torch.version.cuda}")
        print(f"üìä GPU count: {torch.cuda.device_count()}")
        
        for i in range(torch.cuda.device_count()):
            props = torch.cuda.get_device_properties(i)
            print(f"\n   GPU {i}: {props.name}")
            print(f"   Memory: {props.total_memory / 1e9:.1f} GB")
            print(f"   Compute capability: {props.major}.{props.minor}")
        
        print(f"\nüíæ –¢–µ–∫—É—â–µ–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ VRAM:")
        print(f"   Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
        print(f"   Cached: {torch.cuda.memory_reserved() / 1e9:.2f} GB")
    else:
        print("‚ö†Ô∏è  GPU –Ω–µ–¥–æ—Å—Ç—É–ø–µ–Ω! –ë—É–¥–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω CPU.")
    
    print(f"\nüì¶ FAISS GPU support: {hasattr(faiss, 'StandardGpuResources')}")
    print("=" * 60)
    
    return torch.cuda.is_available()


class ProductMatcherGPU:
    """
    GPU-–æ–ø—Ç–∏–º–∏–∑–∏—Ä–æ–≤–∞–Ω–Ω—ã–π –º–∞—Ç—á–µ—Ä —Ç–æ–≤–∞—Ä–æ–≤:
    1. Retrieval: BGE-M3 (GPU) + FAISS ‚Üí —Ç–æ–ø-K –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤
    2. Reranking: BGE-reranker-v2-m3 (GPU) ‚Üí –ª—É—á—à–∏–π –∫–∞–Ω–¥–∏–¥–∞—Ç
    """
    
    _encoder_instance = None
    _reranker_instance = None
    
    def __init__(self, 
                 encoder_model: str = 'BAAI/bge-m3',
                 reranker_model: str = 'BAAI/bge-reranker-v2-m3',
                 cache_dir: str = './cache',
                 use_fp16: bool = True,
                 gpu_id: int = 0,
                 force_gpu: bool = False):
        """
        Args:
            encoder_model: –ú–æ–¥–µ–ª—å –¥–ª—è —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤
            reranker_model: –ú–æ–¥–µ–ª—å –¥–ª—è –ø–µ—Ä–µ—Ä–∞–Ω–∂–∏—Ä–æ–≤–∞–Ω–∏—è
            cache_dir: –î–∏—Ä–µ–∫—Ç–æ—Ä–∏—è –¥–ª—è –∫–µ—à–∞
            use_fp16: –ò—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å float16 –¥–ª—è —ç–∫–æ–Ω–æ–º–∏–∏ VRAM
            gpu_id: ID GPU –¥–ª—è –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è
            force_gpu: –ü—Ä–∏–Ω—É–¥–∏—Ç–µ–ª—å–Ω–æ —Ç—Ä–µ–±–æ–≤–∞—Ç—å GPU
        """
        
        self.gpu_available = torch.cuda.is_available()
        self.gpu_id = gpu_id
        
        if force_gpu and not self.gpu_available:
            raise RuntimeError("‚ùå GPU –Ω–µ –Ω–∞–π–¥–µ–Ω!")
        
        # –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∞—Ç—Ä–∏–±—É—Ç–æ–≤
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(exist_ok=True)
        self.faiss_index: Optional[faiss.Index] = None
        self.gpu_resources = None
        self.eva_texts_clean: Optional[List[str]] = None
        self.eva_df: Optional[pd.DataFrame] = None
        
        # –£—Å—Ç—Ä–æ–π—Å—Ç–≤–æ
        if self.gpu_available:
            self.device = f'cuda:{gpu_id}'
            torch.cuda.set_device(gpu_id)
        else:
            self.device = 'cpu'
        
        print("=" * 60)
        print("üöÄ –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è ProductMatcherGPU")
        print("=" * 60)
        print(f"üñ•Ô∏è  Device: {self.device}")
        
        if self.gpu_available:
            props = torch.cuda.get_device_properties(gpu_id)
            self.vram_total = props.total_memory / 1e9
            print(f"üéÆ GPU: {props.name}")
            print(f"üíæ VRAM: {self.vram_total:.1f} GB")
            
            # FAISS-GPU (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)
            self.faiss_gpu_available = hasattr(faiss, 'StandardGpuResources')
            if self.faiss_gpu_available:
                try:
                    self.gpu_resources = faiss.StandardGpuResources()
                    self.gpu_resources.setTempMemory(256 * 1024 * 1024)  # 256MB
                    print(f"üîß FAISS-GPU: ‚úÖ –î–æ—Å—Ç—É–ø–µ–Ω")
                except:
                    self.faiss_gpu_available = False
                    print(f"üîß FAISS-GPU: ‚ùå –û—à–∏–±–∫–∞ –∏–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏–∏")
            else:
                print(f"üîß FAISS-GPU: ‚ùå –ù–µ —É—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω (–∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è CPU)")
        else:
            self.vram_total = 0
            self.faiss_gpu_available = False
        
        # Dtype
        if use_fp16 and self.gpu_available:
            self.model_dtype = torch.float16
            print("‚ö° Precision: FP16")
        else:
            self.model_dtype = torch.float32
            print("üìä Precision: FP32")
        
        # –ó–∞–≥—Ä—É–∑–∫–∞ –º–æ–¥–µ–ª–µ–π
        self._load_encoder(encoder_model)
        self._load_reranker(reranker_model)
        
        if self.gpu_available:
            self._print_gpu_memory()
        
        print("\n‚úÖ –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∑–∞–≤–µ—Ä—à–µ–Ω–∞!")
        print("=" * 60)
    
    def _load_encoder(self, model_name: str):
        """–ó–∞–≥—Ä—É–∑–∫–∞ encoder –Ω–∞ GPU"""
        if ProductMatcherGPU._encoder_instance is None:
            print(f"\nüì• –ó–∞–≥—Ä—É–∑–∫–∞ encoder: {model_name}")
            
            try:
                # –°–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–π —Å–ø–æ—Å–æ–± —Å dtype
                ProductMatcherGPU._encoder_instance = SentenceTransformer(
                    model_name,
                    device=self.device,
                    model_kwargs={
                        'torch_dtype': self.model_dtype,
                    }
                )
            except Exception as e1:
                print(f"   ‚ö†Ô∏è –ü–æ–ø—ã—Ç–∫–∞ 1 –Ω–µ —É–¥–∞–ª–∞—Å—å: {e1}")
                try:
                    # Fallback –±–µ–∑ kwargs
                    ProductMatcherGPU._encoder_instance = SentenceTransformer(
                        model_name,
                        device=self.device
                    )
                    if self.model_dtype == torch.float16 and self.gpu_available:
                        ProductMatcherGPU._encoder_instance.half()
                except Exception as e2:
                    print(f"   ‚ùå –û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏: {e2}")
                    raise
            
            if self.gpu_available:
                try:
                    dev = next(ProductMatcherGPU._encoder_instance.parameters()).device
                    print(f"   ‚úÖ Encoder –Ω–∞: {dev}")
                except:
                    pass
        else:
            print("‚úÖ Encoder —É–∂–µ –∑–∞–≥—Ä—É–∂–µ–Ω")
        
        self.encoder = ProductMatcherGPU._encoder_instance
    
    def _load_reranker(self, model_name: str):
        """–ó–∞–≥—Ä—É–∑–∫–∞ reranker –Ω–∞ GPU"""
        if ProductMatcherGPU._reranker_instance is None:
            print(f"\nüì• –ó–∞–≥—Ä—É–∑–∫–∞ reranker: {model_name}")
            
            try:
                # –°–æ–≤—Ä–µ–º–µ–Ω–Ω—ã–π —Å–ø–æ—Å–æ–± —Å model_kwargs
                ProductMatcherGPU._reranker_instance = CrossEncoder(
                    model_name,
                    max_length=512,
                    device=self.device,
                    model_kwargs={'torch_dtype': self.model_dtype}
                )
            except Exception as e1:
                print(f"   ‚ö†Ô∏è –ü–æ–ø—ã—Ç–∫–∞ 1 –Ω–µ —É–¥–∞–ª–∞—Å—å: {e1}")
                try:
                    # Fallback
                    ProductMatcherGPU._reranker_instance = CrossEncoder(
                        model_name,
                        max_length=512,
                        device=self.device
                    )
                    if self.model_dtype == torch.float16 and self.gpu_available:
                        ProductMatcherGPU._reranker_instance.model.half()
                except Exception as e2:
                    print(f"   ‚ùå –û—à–∏–±–∫–∞ –∑–∞–≥—Ä—É–∑–∫–∏: {e2}")
                    raise
            
            if self.gpu_available:
                try:
                    dev = next(ProductMatcherGPU._reranker_instance.model.parameters()).device
                    print(f"   ‚úÖ Reranker –Ω–∞: {dev}")
                except:
                    pass
        else:
            print("‚úÖ Reranker —É–∂–µ –∑–∞–≥—Ä—É–∂–µ–Ω")
        
        self.reranker = ProductMatcherGPU._reranker_instance
    
    def _print_gpu_memory(self):
        """–í—ã–≤–æ–¥ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è GPU"""
        if self.gpu_available:
            allocated = torch.cuda.memory_allocated(self.gpu_id) / 1e9
            reserved = torch.cuda.memory_reserved(self.gpu_id) / 1e9
            print(f"\nüíæ GPU Memory: {allocated:.2f} / {self.vram_total:.1f} GB (reserved: {reserved:.2f} GB)")
    
    def _get_free_vram(self) -> float:
        """–ü–æ–ª—É—á–∏—Ç—å —Å–≤–æ–±–æ–¥–Ω—É—é VRAM –≤ GB"""
        if not self.gpu_available:
            return 0
        allocated = torch.cuda.memory_allocated(self.gpu_id) / 1e9
        return self.vram_total - allocated
    
    @classmethod
    def clear_models(cls):
        """–û—á–∏—Å—Ç–∫–∞ –º–æ–¥–µ–ª–µ–π –∏ GPU –ø–∞–º—è—Ç–∏"""
        print("üßπ –û—á–∏—Å—Ç–∫–∞ –º–æ–¥–µ–ª–µ–π...")
        cls._encoder_instance = None
        cls._reranker_instance = None
        gc.collect()
        
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            torch.cuda.synchronize()
            print(f"   GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
        
        print("‚úÖ –û—á–∏—â–µ–Ω–æ")
    
    # =========================================================================
    # –û—á–∏—Å—Ç–∫–∞ —Ç–µ–∫—Å—Ç–∞
    # =========================================================================
    
    def clean_text(self, text: str) -> str:
        """–ù–æ—Ä–º–∞–ª–∏–∑–∞—Ü–∏—è —Ç–µ–∫—Å—Ç–∞"""
        if pd.isna(text):
            return ""
        
        text = str(text).lower().strip()
        
        # –ï–¥–∏–Ω–∏—Ü—ã –∏–∑–º–µ—Ä–µ–Ω–∏—è
        replacements = [
            (r'(\d+)\s*–º–ª\b', r'\1–º–ª'),
            (r'(\d+)\s*ml\b', r'\1–º–ª'),
            (r'(\d+)\s*–≥\b', r'\1–≥'),
            (r'(\d+)\s*g\b', r'\1–≥'),
            (r'(\d+)\s*–∫–≥\b', r'\1–∫–≥'),
            (r'(\d+)\s*kg\b', r'\1–∫–≥'),
            (r'(\d+)\s*–ª\b', r'\1–ª'),
            (r'(\d+)\s*l\b', r'\1–ª'),
            (r'(\d+)\s*—à—Ç\b', r'\1—à—Ç'),
        ]
        
        for pattern, repl in replacements:
            text = re.sub(pattern, repl, text)
        
        # –û—á–∏—Å—Ç–∫–∞
        text = re.sub(r'[^\w\s\-\.,/]', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        
        return text.strip()
    
    def clean_texts_batch(self, texts: List[str], desc: str = "–û—á–∏—Å—Ç–∫–∞") -> List[str]:
        """–ë–∞—Ç—á-–æ—á–∏—Å—Ç–∫–∞ —Ç–µ–∫—Å—Ç–æ–≤"""
        return [self.clean_text(t) for t in tqdm(texts, desc=desc)]
    
    # =========================================================================
    # –≠–º–±–µ–¥–¥–∏–Ω–≥–∏
    # =========================================================================
    
    def create_embeddings(self, 
                          texts: List[str], 
                          batch_size: int = 8,
                          show_progress: bool = True) -> np.ndarray:
        """–°–æ–∑–¥–∞–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤ (GPU)"""
        
        # –ê–≤—Ç–æ–ø–æ–¥–±–æ—Ä batch_size –¥–ª—è RTX 3050
        if self.gpu_available:
            free_vram = self._get_free_vram()
            if free_vram < 1.5:
                batch_size = min(batch_size, 4)
            elif free_vram < 2.0:
                batch_size = min(batch_size, 8)
            elif free_vram < 3.0:
                batch_size = min(batch_size, 16)
            
            print(f"   Batch size: {batch_size} (free VRAM: {free_vram:.1f} GB)")
        
        embeddings = self.encoder.encode(
            texts,
            batch_size=batch_size,
            show_progress_bar=show_progress,
            normalize_embeddings=True,
            convert_to_numpy=True,
            device=self.device
        )
        
        return embeddings.astype('float32')
    
    # =========================================================================
    # FAISS
    # =========================================================================
    
    def create_faiss_index(self, embeddings: np.ndarray) -> faiss.Index:
        """–°–æ–∑–¥–∞–Ω–∏–µ FAISS –∏–Ω–¥–µ–∫—Å–∞"""
        dimension = embeddings.shape[1]
        
        # CPU –∏–Ω–¥–µ–∫—Å (–Ω–∞–¥—ë–∂–Ω–µ–µ)
        index = faiss.IndexFlatIP(dimension)
        index.add(embeddings)
        
        # GPU –µ—Å–ª–∏ –¥–æ—Å—Ç—É–ø–µ–Ω –∏ –¥–æ—Å—Ç–∞—Ç–æ—á–Ω–æ –ø–∞–º—è—Ç–∏
        if self.faiss_gpu_available and self.gpu_resources and self._get_free_vram() > 0.5:
            try:
                gpu_index = faiss.index_cpu_to_gpu(self.gpu_resources, self.gpu_id, index)
                print(f"   ‚úÖ FAISS –Ω–∞ GPU")
                return gpu_index
            except Exception as e:
                print(f"   ‚ö†Ô∏è FAISS –Ω–∞ GPU –Ω–µ —É–¥–∞–ª–æ—Å—å: {e}")
        
        print(f"   üìä FAISS –Ω–∞ CPU")
        return index
    
    def _index_to_cpu(self, index: faiss.Index) -> faiss.Index:
        """–ü–µ—Ä–µ–Ω–æ—Å –∏–Ω–¥–µ–∫—Å–∞ –Ω–∞ CPU"""
        if hasattr(faiss, 'index_gpu_to_cpu'):
            try:
                return faiss.index_gpu_to_cpu(index)
            except:
                pass
        return index
    
    # =========================================================================
    # –ö–µ—à–∏—Ä–æ–≤–∞–Ω–∏–µ
    # =========================================================================
    
    def _get_cache_path(self, name: str) -> Path:
        return self.cache_dir / name
    
    def save_index(self, 
                   df: pd.DataFrame,
                   text_column: str,
                   cache_name: str = 'products'):
        """–°–æ–∑–¥–∞–Ω–∏–µ –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –∏–Ω–¥–µ–∫—Å–∞"""
        print("\n" + "=" * 60)
        print(f"üíæ –°–æ–∑–¥–∞–Ω–∏–µ –∏–Ω–¥–µ–∫—Å–∞ ({len(df)} —Ç–æ–≤–∞—Ä–æ–≤)")
        print("=" * 60)
        
        # –û—á–∏—Å—Ç–∫–∞
        print("\n1Ô∏è‚É£ –û—á–∏—Å—Ç–∫–∞ —Ç–µ–∫—Å—Ç–æ–≤...")
        texts_clean = self.clean_texts_batch(df[text_column].tolist())
        
        # –≠–º–±–µ–¥–¥–∏–Ω–≥–∏
        print("\n2Ô∏è‚É£ –°–æ–∑–¥–∞–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤...")
        embeddings = self.create_embeddings(texts_clean)
        
        # –ò–Ω–¥–µ–∫—Å
        print("\n3Ô∏è‚É£ –°–æ–∑–¥–∞–Ω–∏–µ FAISS –∏–Ω–¥–µ–∫—Å–∞...")
        index = self.create_faiss_index(embeddings)
        
        # –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ
        print("\n4Ô∏è‚É£ –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ...")
        cpu_index = self._index_to_cpu(index)
        
        np.save(self._get_cache_path(f'{cache_name}_embeddings.npy'), embeddings)
        pd.DataFrame({'text_clean': texts_clean}).to_parquet(
            self._get_cache_path(f'{cache_name}_texts.parquet')
        )
        faiss.write_index(cpu_index, str(self._get_cache_path(f'{cache_name}_index.faiss')))
        df.to_parquet(self._get_cache_path(f'{cache_name}_products.parquet'))
        
        print(f"‚úÖ –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ –≤ {self.cache_dir}/")
        
        self.faiss_index = index
        self.eva_texts_clean = texts_clean
        self.eva_df = df
        
        if self.gpu_available:
            self._print_gpu_memory()
        
        return index, texts_clean
    
    def load_index(self, cache_name: str = 'products') -> bool:
        """–ó–∞–≥—Ä—É–∑–∫–∞ –∏–Ω–¥–µ–∫—Å–∞ –∏–∑ –∫–µ—à–∞"""
        paths = [
            self._get_cache_path(f'{cache_name}_embeddings.npy'),
            self._get_cache_path(f'{cache_name}_texts.parquet'),
            self._get_cache_path(f'{cache_name}_index.faiss'),
            self._get_cache_path(f'{cache_name}_products.parquet'),
        ]
        
        if not all(p.exists() for p in paths):
            print("‚ö†Ô∏è –ö–µ—à –Ω–µ –Ω–∞–π–¥–µ–Ω")
            return False
        
        print("\nüìÇ –ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ –∫–µ—à–∞...")
        
        self.faiss_index = faiss.read_index(str(paths[2]))
        self.eva_texts_clean = pd.read_parquet(paths[1])['text_clean'].tolist()
        self.eva_df = pd.read_parquet(paths[3])
        
        print(f"‚úÖ –ó–∞–≥—Ä—É–∂–µ–Ω–æ {self.faiss_index.ntotal} —Ç–æ–≤–∞—Ä–æ–≤")
        
        if self.gpu_available:
            self._print_gpu_memory()
        
        return True
    
    # =========================================================================
    # –ü–æ–∏—Å–∫ –∏ Reranking
    # =========================================================================
    
    def search_candidates(self, 
                          query_embeddings: np.ndarray,
                          top_k: int = 10) -> Tuple[np.ndarray, np.ndarray]:
        """–ü–æ–∏—Å–∫ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤"""
        scores, indices = self.faiss_index.search(query_embeddings.astype('float32'), top_k)
        return indices, scores
    
    def rerank_batch(self,
                     query_texts: List[str],
                     candidates_indices: np.ndarray,
                     batch_size: int = 16) -> List[Tuple[int, float, int]]:
        """–ü–µ—Ä–µ—Ä–∞–Ω–∂–∏—Ä–æ–≤–∞–Ω–∏–µ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤"""
        
        # –°–æ–±–∏—Ä–∞–µ–º –ø–∞—Ä—ã
        all_pairs = []
        pair_info = []
        
        for q_idx, (query, cand_indices) in enumerate(zip(query_texts, candidates_indices)):
            for pos, idx in enumerate(cand_indices):
                if idx >= 0:
                    all_pairs.append([query, self.eva_texts_clean[idx]])
                    pair_info.append((q_idx, pos, idx))
        
        # –ê–≤—Ç–æ–ø–æ–¥–±–æ—Ä batch_size
        if self.gpu_available:
            free_vram = self._get_free_vram()
            if free_vram < 1.0:
                batch_size = min(batch_size, 8)
            elif free_vram < 1.5:
                batch_size = min(batch_size, 16)
            elif free_vram < 2.0:
                batch_size = min(batch_size, 32)
        
        print(f"   Reranking {len(all_pairs)} –ø–∞—Ä (batch={batch_size})...")
        
        # Predict
        all_scores = self.reranker.predict(
            all_pairs, 
            batch_size=batch_size,
            show_progress_bar=True
        )
        
        # –ì—Ä—É–ø–ø–∏—Ä—É–µ–º
        query_results = {}
        for (q_idx, pos, idx), score in zip(pair_info, all_scores):
            if q_idx not in query_results:
                query_results[q_idx] = []
            query_results[q_idx].append((idx, float(score), pos))
        
        # –õ—É—á—à–∏–π –¥–ª—è –∫–∞–∂–¥–æ–≥–æ
        results = []
        for q_idx in range(len(query_texts)):
            if q_idx in query_results:
                best = max(query_results[q_idx], key=lambda x: x[1])
                results.append(best)
            else:
                results.append((-1, 0.0, -1))
        
        return results
    
    # =========================================================================
    # –û—Å–Ω–æ–≤–Ω–æ–π –º–µ—Ç–æ–¥
    # =========================================================================
    
    def match_products(self,
                       competitor_df: pd.DataFrame,
                       competitor_col: str = 'name',
                       our_df: Optional[pd.DataFrame] = None,
                       our_col: str = 'name',
                       top_k: int = 10,
                       threshold: float = 0.5,
                       encoder_batch: int = 8,
                       reranker_batch: int = 16,
                       use_cache: bool = True,
                       cache_name: str = 'products') -> pd.DataFrame:
        """
        –°–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–∏–µ —Ç–æ–≤–∞—Ä–æ–≤
        
        Args:
            competitor_df: DataFrame –∫–æ–Ω–∫—É—Ä–µ–Ω—Ç–∞
            competitor_col: –ö–æ–ª–æ–Ω–∫–∞ —Å –Ω–∞–∑–≤–∞–Ω–∏–µ–º —Ç–æ–≤–∞—Ä–∞
            our_df: –ù–∞—à DataFrame (–∏–ª–∏ None –µ—Å–ª–∏ –µ—Å—Ç—å –∫–µ—à)
            our_col: –ö–æ–ª–æ–Ω–∫–∞ —Å –Ω–∞–∑–≤–∞–Ω–∏–µ–º —Ç–æ–≤–∞—Ä–∞
            top_k: –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –¥–ª—è reranking
            threshold: –ü–æ—Ä–æ–≥ —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç–∏
            encoder_batch: Batch size –¥–ª—è encoder
            reranker_batch: Batch size –¥–ª—è reranker
            use_cache: –ò—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –∫–µ—à
            cache_name: –ò–º—è –∫–µ—à–∞
        """
        print("\n" + "=" * 70)
        print("üöÄ –°–û–ü–û–°–¢–ê–í–õ–ï–ù–ò–ï –¢–û–í–ê–†–û–í")
        print("=" * 70)
        
        if self.gpu_available:
            self._print_gpu_memory()
        
        # 1. –ò–Ω–¥–µ–∫—Å –Ω–∞—à–∏—Ö —Ç–æ–≤–∞—Ä–æ–≤
        if self.faiss_index is None:
            if use_cache and self.load_index(cache_name):
                pass
            elif our_df is not None:
                self.save_index(our_df, our_col, cache_name)
            else:
                raise ValueError("–£–∫–∞–∂–∏—Ç–µ our_df –∏–ª–∏ –∏—Å–ø–æ–ª—å–∑—É–π—Ç–µ –∫–µ—à")
        
        print(f"\nüì¶ –ù–∞—à–∏ —Ç–æ–≤–∞—Ä—ã: {self.faiss_index.ntotal}")
        print(f"üì¶ –ö–æ–Ω–∫—É—Ä–µ–Ω—Ç: {len(competitor_df)}")
        
        # 2. –û–±—Ä–∞–±–æ—Ç–∫–∞ –∫–æ–Ω–∫—É—Ä–µ–Ω—Ç–∞
        print("\n" + "-" * 50)
        print("üìù –≠—Ç–∞–ø 1: –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Ç–æ–≤–∞—Ä–æ–≤ –∫–æ–Ω–∫—É—Ä–µ–Ω—Ç–∞")
        print("-" * 50)
        
        comp_texts = competitor_df[competitor_col].tolist()
        comp_texts_clean = self.clean_texts_batch(comp_texts, "–û—á–∏—Å—Ç–∫–∞")
        
        print("\n–°–æ–∑–¥–∞–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤...")
        comp_embeddings = self.create_embeddings(comp_texts_clean, batch_size=encoder_batch)
        
        # 3. Retrieval
        print("\n" + "-" * 50)
        print(f"üîç –≠—Ç–∞–ø 2: Retrieval (—Ç–æ–ø-{top_k})")
        print("-" * 50)
        
        cand_indices, cand_scores = self.search_candidates(comp_embeddings, top_k)
        print(f"   ‚úÖ –ù–∞–π–¥–µ–Ω–æ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –¥–ª—è {len(cand_indices)} —Ç–æ–≤–∞—Ä–æ–≤")
        
        # 4. Reranking
        print("\n" + "-" * 50)
        print("üéØ –≠—Ç–∞–ø 3: Reranking")
        print("-" * 50)
        
        rerank_results = self.rerank_batch(comp_texts_clean, cand_indices, batch_size=reranker_batch)
        
        # 5. –†–µ–∑—É–ª—å—Ç–∞—Ç—ã
        print("\n" + "-" * 50)
        print("üìä –≠—Ç–∞–ø 4: –§–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤")
        print("-" * 50)
        
        # –û–ø—Ä–µ–¥–µ–ª—è–µ–º –∫–æ–ª–æ–Ω–∫—É
        if our_col in self.eva_df.columns:
            eva_col = our_col
        else:
            eva_col = self.eva_df.columns[1]
        
        results = []
        for comp_idx, (eva_idx, score, rank) in enumerate(rerank_results):
            
            status = "matched" if score >= threshold else "low_confidence"
            
            results.append({
                'competitor_index': comp_idx,
                'competitor_product': competitor_df.iloc[comp_idx][competitor_col],
                'our_index': eva_idx if status == "matched" else None,
                'our_product': self.eva_df.iloc[eva_idx][eva_col] if eva_idx >= 0 else None,
                'retrieval_score': float(cand_scores[comp_idx][rank]) if rank >= 0 else 0.0,
                'rerank_score': score,
                'retrieval_rank': rank + 1 if rank >= 0 else -1,
                'match_status': status
            })
        
        result_df = pd.DataFrame(results)
        
        # –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞
        matched = len(result_df[result_df['match_status'] == 'matched'])
        low_conf = len(result_df[result_df['match_status'] == 'low_confidence'])
        
        print(f"\n‚úÖ –ì–æ—Ç–æ–≤–æ!")
        print(f"   –°–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–æ: {matched} ({matched/len(result_df)*100:.1f}%)")
        print(f"   –ù–∏–∑–∫–∞—è —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç—å: {low_conf} ({low_conf/len(result_df)*100:.1f}%)")
        
        if self.gpu_available:
            self._print_gpu_memory()
        
        return result_df
    
    def analyze_results(self, df: pd.DataFrame, show_examples: int = 5) -> Dict:
        """–ê–Ω–∞–ª–∏–∑ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤"""
        print("\n" + "=" * 70)
        print("üìä –ê–ù–ê–õ–ò–ó –†–ï–ó–£–õ–¨–¢–ê–¢–û–í")
        print("=" * 70)
        
        total = len(df)
        matched = df[df['match_status'] == 'matched']
        low_conf = df[df['match_status'] == 'low_confidence']
        
        print(f"\nüìà –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞:")
        print(f"   –í—Å–µ–≥–æ: {total}")
        print(f"   ‚úÖ –°–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–æ: {len(matched)} ({len(matched)/total*100:.1f}%)")
        print(f"   ‚ö†Ô∏è  –ù–∏–∑–∫–∞—è —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç—å: {len(low_conf)} ({len(low_conf)/total*100:.1f}%)")
        
        print(f"\nüìâ Rerank score:")
        print(f"   –°—Ä–µ–¥–Ω–µ–µ: {df['rerank_score'].mean():.4f}")
        print(f"   –ú–µ–¥–∏–∞–Ω–∞: {df['rerank_score'].median():.4f}")
        print(f"   –ú–∏–Ω/–ú–∞–∫—Å: {df['rerank_score'].min():.4f} / {df['rerank_score'].max():.4f}")
        
        print(f"\nüìä –†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ:")
        for t in [0.9, 0.7, 0.5, 0.3]:
            count = len(df[df['rerank_score'] >= t])
            print(f"   >= {t}: {count} ({count/total*100:.1f}%)")
        
        if show_examples > 0 and len(matched) > 0:
            print(f"\nüìã –¢–æ–ø-{show_examples} –ª—É—á—à–∏—Ö:")
            print("-" * 70)
            
            for i, (_, row) in enumerate(matched.nlargest(show_examples, 'rerank_score').iterrows(), 1):
                print(f"\n{i}. Score: {row['rerank_score']:.4f}")
                print(f"   –ö–æ–Ω–∫—É—Ä–µ–Ω—Ç: {row['competitor_product'][:65]}")
                print(f"   –ù–∞—à:       {str(row['our_product'])[:65]}")
        
        if show_examples > 0 and len(low_conf) > 0:
            print(f"\n‚ö†Ô∏è  –•—É–¥—à–∏–µ {min(3, len(low_conf))}:")
            print("-" * 70)
            
            for i, (_, row) in enumerate(low_conf.nsmallest(min(3, len(low_conf)), 'rerank_score').iterrows(), 1):
                print(f"\n{i}. Score: {row['rerank_score']:.4f}")
                print(f"   –ö–æ–Ω–∫—É—Ä–µ–Ω—Ç: {row['competitor_product'][:65]}")
                print(f"   –õ—É—á—à–∏–π:    {str(row['our_product'])[:65]}")
        
        return {
            'total': total,
            'matched': len(matched),
            'low_confidence': len(low_conf),
            'avg_score': df['rerank_score'].mean(),
            'median_score': df['rerank_score'].median()
        }


# =============================================================================
# –ë–´–°–¢–†–´–ô –ó–ê–ü–£–°–ö
# =============================================================================

def quick_match(competitor_csv: str,
                our_csv: str,
                output_csv: str = 'matches.csv',
                competitor_col: str = 'name',
                our_col: str = 'name',
                threshold: float = 0.5,
                top_k: int = 5) -> pd.DataFrame:
    """
    –ë—ã—Å—Ç—Ä—ã–π –º–∞—Ç—á–∏–Ω–≥ –∏–∑ CSV —Ñ–∞–π–ª–æ–≤
    
    –ü—Ä–∏–º–µ—Ä:
        results = quick_match('competitor.csv', 'our_products.csv')
    """
    print("üìÇ –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö...")
    competitor_df = pd.read_csv(competitor_csv)
    our_df = pd.read_csv(our_csv)
    
    print(f"   –ö–æ–Ω–∫—É—Ä–µ–Ω—Ç: {len(competitor_df)} —Ç–æ–≤–∞—Ä–æ–≤")
    print(f"   –ù–∞—à–∏: {len(our_df)} —Ç–æ–≤–∞—Ä–æ–≤")
    
    # –ú–∞—Ç—á–µ—Ä
    matcher = ProductMatcherGPU(use_fp16=True, force_gpu=False)
    
    # –ú–∞—Ç—á–∏–Ω–≥ (–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –¥–ª—è 4GB VRAM)
    results = matcher.match_products(
        competitor_df,
        competitor_col=competitor_col,
        our_df=our_df,
        our_col=our_col,
        top_k=top_k,
        threshold=threshold,
        encoder_batch=8,
        reranker_batch=16,
        use_cache=True
    )
    
    # –ê–Ω–∞–ª–∏–∑
    matcher.analyze_results(results)
    
    # –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ
    results.to_csv(output_csv, index=False, encoding='utf-8-sig')
    print(f"\nüíæ –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ: {output_csv}")
    
    return results


if __name__ == "__main__":
    # 1. –ü—Ä–æ–≤–µ—Ä—è–µ–º —Å—Ç–∞—Ç—É—Å GPU –ø–µ—Ä–µ–¥ –∑–∞–ø—É—Å–∫–æ–º
    check_gpu_status()

    # 2. –ß–∏—Ç–∞–µ–º —Ñ–∞–π–ª—ã
    print("\nüìÇ –ó–ê–ì–†–£–ó–ö–ê –î–ê–ù–ù–´–•:")
    try:
        # –ï—Å–ª–∏ —É –≤–∞—Å —Ä–∞–∑–¥–µ–ª–∏—Ç–µ–ª—å —Ç–æ—á–∫–∞ —Å –∑–∞–ø—è—Ç–æ–π, –¥–æ–±–∞–≤—å—Ç–µ –∞—Ä–≥—É–º–µ–Ω—Ç: sep=';'
        df_competitor = pd.read_csv("competitor_products.csv") 
        df_our = pd.read_csv("our_products.csv")
    except FileNotFoundError:
        print("‚ùå –û—à–∏–±–∫–∞: –§–∞–π–ª—ã –Ω–µ –Ω–∞–π–¥–µ–Ω—ã.")
        print("   –£–±–µ–¥–∏—Ç–µ—Å—å, —á—Ç–æ 'competitor_products.csv' –∏ 'our_products.csv' –ª–µ–∂–∞—Ç —Ä—è–¥–æ–º —Å–æ —Å–∫—Ä–∏–ø—Ç–æ–º.")
        exit()

    # 3. –ê–Ω–∞–ª–∏–∑ –∫–æ–ª–æ–Ω–æ–∫ (–ø–æ–º–æ–≥–∞–µ—Ç –ø–æ–Ω—è—Ç—å, —á—Ç–æ –≤–ø–∏—Å—ã–≤–∞—Ç—å –≤ –Ω–∞—Å—Ç—Ä–æ–π–∫–∏)
    print("\nüîç –ê–ù–ê–õ–ò–ó –ö–û–õ–û–ù–û–ö:")
    print(f"–§–∞–π–ª –∫–æ–Ω–∫—É—Ä–µ–Ω—Ç–∞ (df_competitor): {df_competitor.columns.tolist()}")
    print(f"–ù–∞—à —Ñ–∞–π–ª (df_our):               {df_our.columns.tolist()}")
    print("-" * 60)

    # =========================================================================
    # ‚öôÔ∏è –ù–ê–°–¢–†–û–ô–ö–ò (–û–¢–†–ï–î–ê–ö–¢–ò–†–£–ô–¢–ï –≠–¢–û–¢ –ë–õ–û–ö)
    # =========================================================================
    
    # ‚ùó –í–ø–∏—à–∏—Ç–µ —Å—é–¥–∞ —Ç–æ—á–Ω—ã–µ –Ω–∞–∑–≤–∞–Ω–∏—è –∫–æ–ª–æ–Ω–æ–∫ —Å –Ω–∞–∏–º–µ–Ω–æ–≤–∞–Ω–∏—è–º–∏ —Ç–æ–≤–∞—Ä–æ–≤
    COMPETITOR_COL_NAME = 'name'   # –ù–∞–∑–≤–∞–Ω–∏–µ –∫–æ–ª–æ–Ω–∫–∏ –≤ competitor_products.csv
    OUR_COL_NAME = 'name'          # –ù–∞–∑–≤–∞–Ω–∏–µ –∫–æ–ª–æ–Ω–∫–∏ –≤ our_products.csv

    # –ù–∞—Å—Ç—Ä–æ–π–∫–∏ –ø—Ä–æ–∏–∑–≤–æ–¥–∏—Ç–µ–ª—å–Ω–æ—Å—Ç–∏
    USE_FP16 = True                # True –¥–ª—è RTX 3050 (—ç–∫–æ–Ω–æ–º–∏—Ç –ø–∞–º—è—Ç—å)
    BATCH_SIZE_ENCODER = 8         # –î–ª—è 4GB VRAM –ª—É—á—à–µ 4 –∏–ª–∏ 8
    BATCH_SIZE_RERANKER = 16       # –î–ª—è 4GB VRAM –ª—É—á—à–µ 16
    TOP_K_CANDIDATES = 5           # –°–∫–æ–ª—å–∫–æ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –ø–µ—Ä–µ–ø—Ä–æ–≤–µ—Ä—è—Ç—å —Ä–µ—Ä–∞–Ω–∫–µ—Ä–æ–º

    # =========================================================================

    # 4. –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∫–ª–∞—Å—Å–∞
    # –ú–æ–∂–Ω–æ —É–∫–∞–∑–∞—Ç—å cache_dir, —á—Ç–æ–±—ã –ø—Ä–∏ –ø–æ–≤—Ç–æ—Ä–Ω–æ–º –∑–∞–ø—É—Å–∫–µ –Ω–µ —Å—á–∏—Ç–∞—Ç—å —ç–º–±–µ–¥–¥–∏–Ω–≥–∏ –Ω–∞—à–∏—Ö —Ç–æ–≤–∞—Ä–æ–≤ –∑–∞–Ω–æ–≤–æ
    matcher = ProductMatcherGPU(
        use_fp16=USE_FP16, 
        cache_dir='./cache_products'
    )
    
    # 5. –ó–∞–ø—É—Å–∫ —Å–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–∏—è
    # –û–±—Ä–∞—Ç–∏—Ç–µ –≤–Ω–∏–º–∞–Ω–∏–µ: —Å–∏–≥–Ω–∞—Ç—É—Ä–∞ –º–µ—Ç–æ–¥–∞ match_products –∏–∑–º–µ–Ω–∏–ª–∞—Å—å –≤ –Ω–æ–≤–æ–π –≤–µ—Ä—Å–∏–∏ –∞–ª–≥–æ—Ä–∏—Ç–º–∞
    try:
        results = matcher.match_products(
            competitor_df=df_competitor,
            competitor_col=COMPETITOR_COL_NAME,
            our_df=df_our,
            our_col=OUR_COL_NAME,
            top_k=TOP_K_CANDIDATES,
            threshold=0.5,             # –ü–æ—Ä–æ–≥ —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç–∏ (0.0 - 1.0)
            encoder_batch=BATCH_SIZE_ENCODER,
            reranker_batch=BATCH_SIZE_RERANKER,
            use_cache=True             # –ò—Å–ø–æ–ª—å–∑—É–µ–º –∫–µ—à –¥–ª—è —É—Å–∫–æ—Ä–µ–Ω–∏—è
        )

        # 6. –ê–Ω–∞–ª–∏–∑ –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ
        matcher.analyze_results(results, show_examples=5)
        
        output_file = 'results_gpu_matched.csv'
        results.to_csv(output_file, index=False, encoding='utf-8-sig')
        print(f"\nüíæ –†–µ–∑—É–ª—å—Ç–∞—Ç—ã —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ —Ñ–∞–π–ª: {output_file}")

    except KeyError as e:
        print(f"\n‚ùå –û–®–ò–ë–ö–ê –ö–û–õ–û–ù–û–ö: –ù–µ –Ω–∞–π–¥–µ–Ω–∞ –∫–æ–ª–æ–Ω–∫–∞ {e}")
        print("   –ü—Ä–æ–≤–µ—Ä—å—Ç–µ –±–ª–æ–∫ '–ù–ê–°–¢–†–û–ô–ö–ò' –∏ —Å—Ä–∞–≤–Ω–∏—Ç–µ –Ω–∞–∑–≤–∞–Ω–∏—è —Å –≤—ã–≤–æ–¥–æ–º '–ê–ù–ê–õ–ò–ó –ö–û–õ–û–ù–û–ö'.")
    except Exception as e:
        print(f"\n‚ùå –ü—Ä–æ–∏–∑–æ—à–ª–∞ –æ—à–∏–±–∫–∞ –ø—Ä–∏ –≤—ã–ø–æ–ª–Ω–µ–Ω–∏–∏: {e}")
    finally:
        # –û—á–∏—Å—Ç–∫–∞ –ø–∞–º—è—Ç–∏ GPU –≤ –∫–æ–Ω—Ü–µ —Ä–∞–±–æ—Ç—ã
        ProductMatcherGPU.clear_models()


üîç –ü–†–û–í–ï–†–ö–ê GPU

üì¶ PyTorch version: 2.6.0+cu124
üîß CUDA available: True
üéÆ CUDA version: 12.4
üìä GPU count: 1

   GPU 0: NVIDIA GeForce RTX 3050 Laptop GPU
   Memory: 4.3 GB
   Compute capability: 8.6

üíæ –¢–µ–∫—É—â–µ–µ –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏–µ VRAM:
   Allocated: 0.00 GB
   Cached: 0.00 GB

üì¶ FAISS GPU support: False

üìÇ –ó–ê–ì–†–£–ó–ö–ê –î–ê–ù–ù–´–•:

üîç –ê–ù–ê–õ–ò–ó –ö–û–õ–û–ù–û–ö:
–§–∞–π–ª –∫–æ–Ω–∫—É—Ä–µ–Ω—Ç–∞ (df_competitor): ['id', 'name']
–ù–∞—à —Ñ–∞–π–ª (df_our):               ['id', 'name']
------------------------------------------------------------
üöÄ –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è ProductMatcherGPU
üñ•Ô∏è  Device: cuda:0
üéÆ GPU: NVIDIA GeForce RTX 3050 Laptop GPU
üíæ VRAM: 4.3 GB
üîß FAISS-GPU: ‚ùå –ù–µ —É—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω (–∏—Å–ø–æ–ª—å–∑—É–µ—Ç—Å—è CPU)
‚ö° Precision: FP16

üì• –ó–∞–≥—Ä—É–∑–∫–∞ encoder: BAAI/bge-m3


`torch_dtype` is deprecated! Use `dtype` instead!


   ‚úÖ Encoder –Ω–∞: cuda:0

üì• –ó–∞–≥—Ä—É–∑–∫–∞ reranker: BAAI/bge-reranker-v2-m3
   ‚úÖ Reranker –Ω–∞: cuda:0

üíæ GPU Memory: 2.27 / 4.3 GB (reserved: 2.28 GB)

‚úÖ –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è –∑–∞–≤–µ—Ä—à–µ–Ω–∞!

üöÄ –°–û–ü–û–°–¢–ê–í–õ–ï–ù–ò–ï –¢–û–í–ê–†–û–í

üíæ GPU Memory: 2.27 / 4.3 GB (reserved: 2.28 GB)
‚ö†Ô∏è –ö–µ—à –Ω–µ –Ω–∞–π–¥–µ–Ω

üíæ –°–æ–∑–¥–∞–Ω–∏–µ –∏–Ω–¥–µ–∫—Å–∞ (500 —Ç–æ–≤–∞—Ä–æ–≤)

1Ô∏è‚É£ –û—á–∏—Å—Ç–∫–∞ —Ç–µ–∫—Å—Ç–æ–≤...


–û—á–∏—Å—Ç–∫–∞: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 500/500 [00:00<00:00, 61116.51it/s]


2Ô∏è‚É£ –°–æ–∑–¥–∞–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤...
   Batch size: 8 (free VRAM: 2.0 GB)





Batches:   0%|          | 0/63 [00:00<?, ?it/s]


3Ô∏è‚É£ –°–æ–∑–¥–∞–Ω–∏–µ FAISS –∏–Ω–¥–µ–∫—Å–∞...
   üìä FAISS –Ω–∞ CPU

4Ô∏è‚É£ –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ...
‚úÖ –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ –≤ cache_products/

üíæ GPU Memory: 2.28 / 4.3 GB (reserved: 2.31 GB)

üì¶ –ù–∞—à–∏ —Ç–æ–≤–∞—Ä—ã: 500
üì¶ –ö–æ–Ω–∫—É—Ä–µ–Ω—Ç: 500

--------------------------------------------------
üìù –≠—Ç–∞–ø 1: –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Ç–æ–≤–∞—Ä–æ–≤ –∫–æ–Ω–∫—É—Ä–µ–Ω—Ç–∞
--------------------------------------------------


–û—á–∏—Å—Ç–∫–∞: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 500/500 [00:00<00:00, 50092.01it/s]


–°–æ–∑–¥–∞–Ω–∏–µ —ç–º–±–µ–¥–¥–∏–Ω–≥–æ–≤...
   Batch size: 8 (free VRAM: 2.0 GB)





Batches:   0%|          | 0/63 [00:00<?, ?it/s]


--------------------------------------------------
üîç –≠—Ç–∞–ø 2: Retrieval (—Ç–æ–ø-5)
--------------------------------------------------
   ‚úÖ –ù–∞–π–¥–µ–Ω–æ –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ –¥–ª—è 500 —Ç–æ–≤–∞—Ä–æ–≤

--------------------------------------------------
üéØ –≠—Ç–∞–ø 3: Reranking
--------------------------------------------------
   Reranking 2500 –ø–∞—Ä (batch=16)...


Batches:   0%|          | 0/157 [00:00<?, ?it/s]


--------------------------------------------------
üìä –≠—Ç–∞–ø 4: –§–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤
--------------------------------------------------

‚úÖ –ì–æ—Ç–æ–≤–æ!
   –°–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–æ: 500 (100.0%)
   –ù–∏–∑–∫–∞—è —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç—å: 0 (0.0%)

üíæ GPU Memory: 2.28 / 4.3 GB (reserved: 2.33 GB)

üìä –ê–ù–ê–õ–ò–ó –†–ï–ó–£–õ–¨–¢–ê–¢–û–í

üìà –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞:
   –í—Å–µ–≥–æ: 500
   ‚úÖ –°–æ–ø–æ—Å—Ç–∞–≤–ª–µ–Ω–æ: 500 (100.0%)
   ‚ö†Ô∏è  –ù–∏–∑–∫–∞—è —É–≤–µ—Ä–µ–Ω–Ω–æ—Å—Ç—å: 0 (0.0%)

üìâ Rerank score:
   –°—Ä–µ–¥–Ω–µ–µ: 0.9911
   –ú–µ–¥–∏–∞–Ω–∞: 0.9995
   –ú–∏–Ω/–ú–∞–∫—Å: 0.7734 / 1.0000

üìä –†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ:
   >= 0.9: 489 (97.8%)
   >= 0.7: 500 (100.0%)
   >= 0.5: 500 (100.0%)
   >= 0.3: 500 (100.0%)

üìã –¢–æ–ø-5 –ª—É—á—à–∏—Ö:
----------------------------------------------------------------------

1. Score: 1.0000
   –ö–æ–Ω–∫—É—Ä–µ–Ω—Ç: Pantene ProV –®–∞–º–ø—É–Ω—å –æ–±—ä–µ–º 250–º–ª
   –ù–∞—à:       Pantene Pro-V Shamp –æ–±—ä–µ–º 250–º