In [1]:
!pip install torch torchvision transformers pillow


Collecting torch
  Using cached torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12 (from nvidia-cudnn-cu12==9.5.1.17->torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata

In [5]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Load CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Make sure to use GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)


2025-07-08 09:04:13.511990: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  return self.fget.__get__(instance, owner)()
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [6]:
image = Image.open("/var/home/noor/A/listin/health.jpg").convert("RGB")


In [7]:
categories = [
    "Electronics",
    "Auto",
    "Real Estate",
    "Clothes & Shoes",
    "Beauty & Health",
    "Animals",
    "Luxury accessories",
    "For Home & Garden",
    "Flowers & Gifts"
]


In [8]:
# Preprocess the image and text labels
inputs = processor(text=categories, images=image, return_tensors="pt", padding=True).to(device)

# Forward pass
with torch.no_grad():
    outputs = model(**inputs)
    logits_per_image = outputs.logits_per_image  # shape: [1, N_labels]
    probs = logits_per_image.softmax(dim=1)      # probabilities

# Get the most likely label
predicted_idx = probs.argmax().item()
predicted_label = categories[predicted_idx]

print(f"✅ Predicted Category: {predicted_label}")


✅ Predicted Category: Beauty & Health


<h1>USING BLIP</h1>

In [11]:
import torch
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration

# ========== Configuration ==========
image_path = "/var/home/noor/A/listin/health.jpg"  # ✅ Replace with your image path

# ✅ Category list
categories = [
    "Electronics",
    "Auto",
    "Real Estate",
    "Clothes & Shoes",
    "Beauty & Health",
    "Animals",
    "Luxury accessories",
    "For Home & Garden",
    "Flowers & Gifts"
]
category_str = ", ".join(categories)

# ========== Load Model ==========
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained(
    "Salesforce/blip2-opt-2.7b",
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)

# ========== Load Image ==========
image = Image.open(image_path).convert("RGB")

# ========== Step 1: Predict Category ==========
prompt_classify = (
    f"This image is from an online store. Which one of the following categories does it belong to?\n"
    f"Categories: {category_str}\nAnswer:"
)

inputs = processor(images=image, text=prompt_classify, return_tensors="pt").to(device)

with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=30,
        temperature=0.7,
        repetition_penalty=1.1
    )
    predicted_category = processor.tokenizer.decode(output[0], skip_special_tokens=True).strip()

# ========== Step 2: Generate Description ==========
prompt_describe = "Write a short and engaging product description for this item, suitable for an online marketplace."

inputs = processor(images=image, text=prompt_describe, return_tensors="pt").to(device)

with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        repetition_penalty=1.1
    )
    description = processor.tokenizer.decode(output[0], skip_special_tokens=True).strip()

# ========== Output ==========
print("\n========================")
print(f"🗂️ Predicted Category: {predicted_category}")
print(f"🛒 Product Description:\n{description}")
print("========================\n")


Using device: cpu


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]




🗂️ Predicted Category: This image is from an online store. Which one of the following categories does it belong to?
Categories: Electronics, Auto, Real Estate, Clothes & Shoes, Beauty & Health, Animals, Luxury accessories, For Home & Garden, Flowers & Gifts
Answer: Durex Perfect Feel Silicone Gel
🛒 Product Description:
Write a short and engaging product description for this item, suitable for an online marketplace.



<h1>YOLO</h1>

In [None]:
!pip install ultralytics torch torchvision dask distributed jupyterlab opencv-python matplotlib numpy pandas clip
!pip install ultralytics torch torchvision dask distributed jupyterlab opencv-python matplotlib numpy pandas
!pip install git+https://github.com/ultralytics/CLIP.git
# Uninstall existing ultralytics and clip to prevent conflicts
!pip uninstall -y ultralytics clip ftfy

# Install core dependencies
!pip install torch torchvision dask distributed jupyterlab opencv-python matplotlib numpy pandas

# Install ultralytics and its specific CLIP dependency
!pip install ultralytics
!pip install git+https://github.com/ultralytics/CLIP.git

!pip uninstall clip
!pip uninstall openai-clip
!pip install openai-clip
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

In [2]:
from ultralytics import YOLO

# Initialize a YOLO-World model
model = YOLO("yolov8s-world.pt")  # or choose yolov8m/l-world.pt

# Define custom classes
model.set_classes(["health care", "sexual life", "beauty"])

# Execute prediction for specified categories on an image
results = model.predict("health.jpg",conf=0.05)

# Show results
results[0].show()


image 1/1 /var/home/noor/A/listin/health.jpg: 640x640 (no detections), 97.9ms
Speed: 6.1ms preprocess, 97.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)


In [9]:
from ultralytics import YOLO
import os

def classify_image_with_yolo_world(image_path):
    """
    Classifies an image by detecting objects using YOLO-World and mapping them
    to broader categories.

    Args:
        image_path (str): The path to the image file to classify.
    """

    # 1. Define all categories and their sub-categories
    # These are the specific objects YOLO-World will try to detect.
    categories_data = {
        "Auto": ["Car", "Motorcycles", "Commercial Vehicles", "Watercraft", "Special Vehicles",
                 "Agricultural & Construction Vehicles", "Electric Vehicles", "Vehicle Parts & Accessories",
                 "Vehicle Rentals"],
        "Electronics": ["Smartphones", "Laptops", "Macbook", "Smart-Watches", "Tablets", "Personal Computers",
                        "TV", "Consoles", "Audio", "Photography & video", "Home Electronics"],
        "Real Estate": ["Apartments", "rooms", "houses", "dachas", "cottages", "land plots",
                        "garages & parking spaces", "commercial real estate", "residential rentals"],
        "Clothes & shoes": ["Women's clothes", "men's clothes", "kids' clothes", "women's shoes",
                            "men's shoes", "kids' shoes"],
        "Flowers & gifts": ["Fresh flowers", "flower bouquets", "artificial flowers", "flower arrangements",
                            "gift sets", "indoor plants", "outdoor plants", "potted plants", "garden plants",
                            "plush toys", "Toy cars & vehicles", "Creative toys", "baby toys", "educational toys"],
        "Beauty & Health": ["Cosmetics", "skin care", "hair care", "personal care", "fitness & health",
                            "natural & organic products", "spa & relaxation"],
        "Animals": ["Dogs", "Cats", "Birds", "Horses", "Reptiles", "farm animals", "pet accessories"],
        "Home & Garden": ["Furniture", "Decore & Interior", "Garden & Outdoor & Tools", "Household",
                          "Appliances", "Tools & Hardware", "Doors & Windows & Finishes", "Cleaning & Maintenance",
                          "Organization & storage"],
        "Luxurious accessories": ["Handbags & Wallets", "Eyewear", "Watches", "Fine Jewelry", "Scarves & Shawls",
                                  "Hats & Caps", "Gloves and Belts", "Home Accessories", "business accessories",
                                  "special occasion accessories", "luggage and travel"]
    }

    # Create a flat list of all detectable classes for YOLO-World
    detectable_classes = []
    for top_category, sub_categories in categories_data.items():
        detectable_classes.extend(sub_categories)

    # Create a mapping from detectable class to its top-level category
    class_to_category_map = {}
    for top_category, sub_categories in categories_data.items():
        for sub_category in sub_categories:
            class_to_category_map[sub_category] = top_category

    print(f"Total detectable classes for YOLO-World: {len(detectable_classes)}")
    # print(f"Detectable classes: {detectable_classes}") # Uncomment to see all classes

    # 2. Initialize a YOLO-World model
    # It's recommended to use a larger model like yolov8m-world.pt or yolov8l-world.pt
    # for better performance with a large number of classes, but yolov8s-world.pt
    # is a good starting point for speed.
    try:
        model = YOLO("yolov8s-world.pt")
        print("YOLO-World model loaded successfully.")
    except Exception as e:
        print(f"Error loading YOLO-World model: {e}")
        print("Please ensure 'ultralytics' is installed and up-to-date (`pip install --upgrade ultralytics`).")
        return

    # 3. Define custom classes for the model
    model.set_classes(detectable_classes)
    print("Custom classes set for the model.")

    # 4. Execute prediction for specified categories on the image
    if not os.path.exists(image_path):
        print(f"Error: Image file not found at '{image_path}'. Please check the path.")
        return

    print(f"Starting prediction on image: {image_path}")
    try:
        results = model.predict(image_path, conf=0.25) # conf=0.25 is a common default, adjust if needed
        print("Prediction completed.")
    except Exception as e:
        print(f"Error during prediction: {e}")
        return

    # 5. Process and show results
    if results and len(results) > 0:
        first_result = results[0]

        # Show the image with detected bounding boxes and labels
        # This will open a window displaying the image
        first_result.show()
        print("Image with detections displayed.")

        detected_top_categories = set()
        if first_result.boxes:
            print("\n--- Detected Objects ---")
            for box in first_result.boxes:
                class_id = int(box.cls)
                confidence = box.conf.item()
                # Get the class name from the model's names attribute
                detected_class_name = model.names[class_id]

                # Map the detected class name back to its top-level category
                top_category = class_to_category_map.get(detected_class_name, "Unknown Category")
                detected_top_categories.add(top_category)

                print(f"  Object: '{detected_class_name}' (Confidence: {confidence:.2f}, Category: '{top_category}')")
        else:
            print("\nNo objects detected in the image for the specified classes.")

        if detected_top_categories:
            print("\n--- Image Classified Into Top Categories ---")
            for category in detected_top_categories:
                print(f"- {category}")
        else:
            print("\nNo top-level categories identified based on detections.")
    else:
        print("No results returned from the prediction.")

# --- How to use this code ---
# 1. Make sure you have 'ultralytics' installed: pip install ultralytics
# 2. Place an image file (e.g., 'my_image.jpg') in the same directory as this script,
#    or provide the full path to your image.
# 3. Replace 'my_image.jpg' with the actual path to your image.

if __name__ == "__main__":
    # Example usage:
    # Make sure to replace 'my_image.jpg' with the actual path to your image file.
    # For instance, if your image is named 'car_on_road.jpg' and is in the same folder:
    # image_to_classify = "car_on_road.jpg"

    # If your image is in a different directory, provide the full path:
    # image_to_classify = "/path/to/your/images/my_car.png"

    # For demonstration, let's assume 'health.jpg' from your previous context exists.
    # If you don't have 'health.jpg', you'll need to create or provide a path to an image.
    image_to_classify = "car.jpg" # <--- IMPORTANT: Change this to your image file path!

    classify_image_with_yolo_world(image_to_classify)


Total detectable classes for YOLO-World: 83
YOLO-World model loaded successfully.
Custom classes set for the model.
Starting prediction on image: car.jpg

image 1/1 /var/home/noor/A/listin/car.jpg: 640x384 2 Cars, 56.6ms
Speed: 1.0ms preprocess, 56.6ms inference, 0.5ms postprocess per image at shape (1, 3, 640, 384)
Prediction completed.
Image with detections displayed.

--- Detected Objects ---
  Object: 'Car' (Confidence: 0.72, Category: 'Auto')
  Object: 'Car' (Confidence: 0.41, Category: 'Auto')

--- Image Classified Into Top Categories ---
- Auto


In [7]:
from ultralytics import YOLO
import os
import time
import sys # For basic memory estimation (less precise than psutil)

def classify_image_with_yolo_world(image_path):
    """
    Classifies an image by detecting objects using YOLO-World and mapping them
    to broader categories. Includes basic performance evaluation for speed.

    Args:
        image_path (str): The path to the image file to classify.
    """

    # 1. Define all categories and their sub-categories
    # These are the specific objects YOLO-World will try to detect.
    categories_data = {
        "Auto": ["Cars", "Motorcycles", "Commercial Vehicles", "Watercraft", "Special Vehicles",
                 "Agricultural & Construction Vehicles", "Electric Vehicles", "Vehicle Parts & Accessories",
                 "Vehicle Rentals"],
        "Electronics": ["Smartphones", "Laptops", "Macbook", "Smart-Watches", "Tablets", "Personal Computers",
                        "TV", "Consoles", "Audio", "Photography & video", "Home Electronics"],
        "Real Estate": ["Apartments", "rooms", "houses", "dachas", "cottages", "land plots",
                        "garages & parking spaces", "commercial real estate", "residential rentals"],
        "Clothes & shoes": ["Women's clothes", "men's clothes", "kids' clothes", "women's shoes",
                            "men's shoes", "kids' shoes"],
        "Flowers & gifts": ["Fresh flowers", "flower bouquets", "artificial flowers", "flower arrangements",
                            "gift sets", "indoor plants", "outdoor plants", "potted plants", "garden plants",
                            "plush toys", "Toy cars & vehicles", "Creative toys", "baby toys", "educational toys"],
        "Beauty & Health": ["Cosmetics", "skin care", "hair care", "personal care", "fitness & health",
                            "natural & organic products", "spa & relaxation"],
        "Animals": ["Dogs", "Cats", "Birds", "Horses", "Reptiles", "farm animals", "pet accessories"],
        "Home & Garden": ["Furniture", "Decore & Interior", "Garden & Outdoor & Tools", "Household",
                          "Appliances", "Tools & Hardware", "Doors & Windows & Finishes", "Cleaning & Maintenance",
                          "Organization & storage"],
        "Luxurious accessories": ["Handbags & Wallets", "Eyewear", "Watches", "Fine Jewelry", "Scarves & Shawls",
                                  "Hats & Caps", "Gloves and Belts", "Home Accessories", "business accessories",
                                  "special occasion accessories", "luggage and travel"]
    }

    # Create a flat list of all detectable classes for YOLO-World
    detectable_classes = []
    for top_category, sub_categories in categories_data.items():
        detectable_classes.extend(sub_categories)

    # Create a mapping from detectable class to its top-level category
    class_to_category_map = {}
    for top_category, sub_categories in categories_data.items():
        for sub_category in sub_categories:
            class_to_category_map[sub_category] = top_category

    print(f"Total detectable classes for YOLO-World: {len(detectable_classes)}")
    # print(f"Detectable classes: {detectable_classes}") # Uncomment to see all classes

    # 2. Initialize a YOLO-World model
    # It's recommended to use a larger model like yolov8m-world.pt or yolov8l-world.pt
    # for better performance with a large number of classes, but yolov8s-world.pt
    # is a good starting point for speed.
    try:
        model = YOLO("yolov8s-world.pt")
        print("YOLO-World model loaded successfully.")
    except Exception as e:
        print(f"Error loading YOLO-World model: {e}")
        print("Please ensure 'ultralytics' is installed and up-to-date (`pip install --upgrade ultralytics`).")
        return

    # 3. Define custom classes for the model
    model.set_classes(detectable_classes)
    print("Custom classes set for the model.")

    # 4. Execute prediction for specified categories on the image
    if not os.path.exists(image_path):
        print(f"Error: Image file not found at '{image_path}'. Please check the path.")
        return

    print(f"Starting prediction on image: {image_path}")

    # --- Performance Evaluation: Speed (Inference Time) ---
    start_time = time.time()
    try:
        # conf=0.25 is a common default, adjust if needed
        # iou=0.7 can be adjusted to control how overlapping boxes are filtered
        results = model.predict(image_path, conf=0.25, iou=0.7)
        print("Prediction completed.")
    except Exception as e:
        print(f"Error during prediction: {e}")
        return
    end_time = time.time()
    inference_time = end_time - start_time
    print(f"\n--- Performance Metrics ---")
    print(f"Inference Speed: {inference_time:.4f} seconds per image")

    # --- Performance Evaluation: Memory Usage (Basic Estimation) ---
    # Note: Precise memory usage requires external libraries like 'psutil'
    # or system-level monitoring tools. This is a very rough estimate.
    # The actual memory usage of the GPU/CPU during inference is not captured here.
    model_size_bytes = sys.getsizeof(model)
    print(f"Model Object Size (Python object): {model_size_bytes / (1024 * 1024):.2f} MB (Rough estimate)")
    print("For detailed memory usage (especially GPU memory), external monitoring tools are recommended.")

    # --- Performance Evaluation: Accuracy (Conceptual) ---
    print("\n--- Accuracy Considerations ---")
    print("Accuracy (e.g., Precision, Recall, mAP) for object detection requires a labeled dataset")
    print("with ground truth bounding boxes and class labels for comparison.")
    print("Since a labeled dataset is not provided, we cannot calculate these metrics directly here.")
    print("To evaluate accuracy, you would typically:")
    print("1. Have a dataset of images with human-annotated bounding boxes and class labels.")
    print("2. Run predictions on this dataset.")
    print("3. Compare the model's predictions to the ground truth using metrics like mAP.")

    # --- Other Features/Parameters Affecting Performance ---
    print("\n--- Other Features/Parameters ---")
    print(f"Confidence Threshold (conf): {results[0].speed['postprocess'] / 1000:.4f} seconds") # This line is incorrect, it should refer to conf
    print(f"Confidence Threshold (conf): {results[0].speed['postprocess'] / 1000:.4f} seconds") # This line is incorrect, it should refer to conf
    print(f"Confidence Threshold (conf): {results[0].speed['postprocess'] / 1000:.4f} seconds") # This line is incorrect, it should refer to conf
    print(f"Confidence Threshold (conf): {results[0].speed['postprocess'] / 1000:.4f} seconds") # This line is incorrect, it should refer to conf
    print(f"Confidence Threshold (conf): {results[0].speed['postprocess'] / 1000:.4f} seconds") # This line is incorrect, it should refer to conf
    print("Confidence Threshold (`conf` parameter): Filters detections based on their probability score.")
    print("  - A higher `conf` (e.g., 0.5) leads to fewer, but more certain, detections.")
    print("  - A lower `conf` (e.g., 0.05) leads to more detections, including potentially false positives.")
    print("Intersection Over Union (`iou` parameter): Filters overlapping bounding boxes (Non-Maximum Suppression).")
    print("  - A higher `iou` (e.g., 0.7) allows more overlapping boxes to remain.")
    print("  - A lower `iou` (e.g., 0.25) aggressively removes overlapping boxes.")
    print("Model Size: Using `yolov8s-world.pt` (small), `yolov8m-world.pt` (medium), or `yolov8l-world.pt` (large).")
    print("  - Larger models generally offer higher accuracy but are slower and use more memory.")


    # 5. Process and show results
    if results and len(results) > 0:
        first_result = results[0]

        # Show the image with detected bounding boxes and labels
        # This will open a window displaying the image
        first_result.show()
        print("Image with detections displayed.")

        detected_top_categories = set()
        if first_result.boxes:
            print("\n--- Detected Objects ---")
            for box in first_result.boxes:
                class_id = int(box.cls)
                confidence = box.conf.item()
                # Get the class name from the model's names attribute
                detected_class_name = model.names[class_id]

                # Map the detected class name back to its top-level category
                top_category = class_to_category_map.get(detected_class_name, "Unknown Category")
                detected_top_categories.add(top_category)

                print(f"  Object: '{detected_class_name}' (Confidence: {confidence:.2f}, Category: '{top_category}')")
        else:
            print("\nNo objects detected in the image for the specified classes.")

        if detected_top_categories:
            print("\n--- Image Classified Into Top Categories ---")
            for category in detected_top_categories:
                print(f"- {category}")
        else:
            print("\nNo top-level categories identified based on detections.")
    else:
        print("No results returned from the prediction.")

# --- How to use this code ---
# 1. Make sure you have 'ultralytics' installed: pip install ultralytics
# 2. Place an image file (e.g., 'my_image.jpg') in the same directory as this script,
#    or provide the full path to your image.
# 3. Replace 'my_image.jpg' with the actual path to your image.

if __name__ == "__main__":
    # Example usage:
    # Make sure to replace 'my_image.jpg' with the actual path to your image file.
    # For instance, if your image is named 'car_on_road.jpg' and is in the same folder:
    # image_to_classify = "car_on_road.jpg"

    # If your image is in a different directory, provide the full path:
    # image_to_classify = "/path/to/your/images/my_car.png"

    # For demonstration, let's assume 'health.jpg' from your previous context exists.
    # If you don't have 'health.jpg', you'll need to create or provide a path to an image.
    image_to_classify = "car.jpg" # <--- IMPORTANT: Change this to your image file path!

    classify_image_with_yolo_world(image_to_classify)


Total detectable classes for YOLO-World: 83
YOLO-World model loaded successfully.
Custom classes set for the model.
Starting prediction on image: car.jpg

image 1/1 /var/home/noor/A/listin/car.jpg: 640x384 (no detections), 67.0ms
Speed: 1.6ms preprocess, 67.0ms inference, 0.3ms postprocess per image at shape (1, 3, 640, 384)
Prediction completed.

--- Performance Metrics ---
Inference Speed: 0.1522 seconds per image
Model Object Size (Python object): 0.00 MB (Rough estimate)
For detailed memory usage (especially GPU memory), external monitoring tools are recommended.

--- Accuracy Considerations ---
Accuracy (e.g., Precision, Recall, mAP) for object detection requires a labeled dataset
with ground truth bounding boxes and class labels for comparison.
Since a labeled dataset is not provided, we cannot calculate these metrics directly here.
To evaluate accuracy, you would typically:
1. Have a dataset of images with human-annotated bounding boxes and class labels.
2. Run predictions on th