# AutoGluon: Zero-Shot Image-Text Matching

## Objective
This notebook demonstrates **zero-shot image-text matching** using AutoGluon with pre-trained models like CLIP. Zero-shot matching works without task-specific training data.

## Use Case
Zero-shot image-text matching is useful for:
- Open-domain image search with natural language
- Product discovery without predefined categories
- Content-based image retrieval
- Visual question answering without training
- Cross-modal similarity search
- Rapid prototyping for new matching tasks

## Key Features
- No training required for new matching tasks
- Uses pre-trained vision-language models (e.g., CLIP)
- Works with arbitrary text queries
- Generalizes to unseen concepts
- Real-time inference on new pairs

In [None]:
# Install AutoGluon
!pip install -q autogluon

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from autogluon.tabular import TabularDataset, TabularPredictor
import os
import shutil

In [None]:
# Load dataset
# TODO: Upload your image-text pairs dataset or use URL
# For zero-shot, training data is optional (can use pre-trained models directly)
# Dataset format:
# - 'image' column (paths to images)
# - 'text' column (text descriptions/queries)
# - Optional: 'label' column for evaluation

# Example: train_data = TabularDataset('path/to/image_text_pairs.csv')

train_data = None  # Replace with your data (optional for zero-shot)
test_data = None   # Replace with your test data

print("Dataset loaded successfully!")
if train_data is not None:
    print(f"Training data shape: {train_data.shape}")
    print(train_data.head())
else:
    print("No training data provided. Will use pre-trained models for zero-shot matching.")

In [None]:
# Set label column
LABEL = 'label'  # TODO: Replace with your label column name (if available)

In [None]:
# Auto-detect problem type based on label
if train_data is not None and LABEL in train_data.columns:
    # Check if the label is numeric (regression) or categorical (classification)
    if pd.api.types.is_numeric_dtype(train_data[LABEL]):
        # Check if it's continuous or discrete
        unique_ratio = train_data[LABEL].nunique() / len(train_data)
        if unique_ratio > 0.05:  # More than 5% unique values suggests regression
            problem_type = 'regression'
            eval_metric = 'rmse'
        else:
            problem_type = 'classification'
            eval_metric = 'roc_auc'
    else:
        problem_type = 'classification'
        eval_metric = 'roc_auc'
else:
    # Default to classification for zero-shot matching
    problem_type = 'classification'
    eval_metric = 'roc_auc'

print(f"Problem Type: {problem_type}")
print(f"Evaluation Metric: {eval_metric}")
print("\nNote: Zero-shot matching uses pre-trained models without task-specific training.")

In [None]:
# Train/Load the model
if train_data is not None:
    # If training data is available, fine-tune for better performance
    predictor = TabularPredictor(
        label=LABEL,
        problem_type=problem_type,
        eval_metric=eval_metric,
        path='./autogluon-zero-shot-matching-model'
    ).fit(
        train_data=train_data,
        presets='medium_quality',
        time_limit=900
    )
    print("Model training completed!")
else:
    # For pure zero-shot, use pre-trained models directly
    print("Using pre-trained CLIP for zero-shot image-text matching.")
    print("You can match any image with any text query without training!")
    # predictor = None  # Would load pre-trained CLIP here

In [None]:
# Display and save leaderboard
if train_data is not None and test_data is not None:
    leaderboard = predictor.leaderboard(test_data, silent=True)
    print("\nModel Leaderboard:")
    print(leaderboard)
    
    # Save leaderboard to CSV
    leaderboard.to_csv('leaderboard.csv', index=False)
    print("\nLeaderboard saved to leaderboard.csv")
else:
    print("Leaderboard requires both training and test data.")

In [None]:
# Display and save feature importance
try:
    if train_data is not None:
        feature_importance = predictor.feature_importance(test_data)
        print("\nFeature Importance:")
        print(feature_importance)
        
        # Save feature importance to CSV
        feature_importance.to_csv('feature_importance.csv')
        print("\nFeature importance saved to feature_importance.csv")
except Exception as e:
    print(f"Could not compute feature importance: {e}")

In [None]:
# Make predictions
if test_data is not None and train_data is not None:
    predictions = predictor.predict(test_data)
    print("\nPredictions (Match Scores):")
    print(predictions.head())
    
    # For classification, also show prediction probabilities
    if problem_type == 'classification':
        pred_probs = predictor.predict_proba(test_data)
        print("\nPrediction Probabilities:")
        print(pred_probs.head())

# Example: Zero-shot matching with arbitrary pairs
print("\nZero-Shot Image-Text Matching Example:")
print("You can match any image with any text without training!")
print("""\nExample usage:
pairs = pd.DataFrame({
    'image': ['photo1.jpg', 'photo2.jpg'],
    'text': ['A cat sitting on a couch', 'A dog playing in the park']
})
# Get similarity scores for each pair
similarities = predictor.predict(pairs)
""")
print("\nApplications:")
print("- Image search: Given a text query, find matching images")
print("- Caption verification: Check if image matches its caption")
print("- Content discovery: Find images relevant to any description")

In [None]:
# Save model artifacts as zip file
model_path = './autogluon-zero-shot-matching-model'
zip_filename = 'autogluon_zero_shot_matching_model'

if os.path.exists(model_path):
    shutil.make_archive(zip_filename, 'zip', model_path)
    print(f"\nModel artifacts saved to {zip_filename}.zip")
else:
    print("Model path not found. Using pre-trained models for zero-shot inference.")