# AutoGluon: Zero-Shot Image Classification with CLIP

## Objective
This notebook demonstrates **zero-shot image classification** using CLIP (Contrastive Language-Image Pre-training). Zero-shot learning allows classification of images into categories without training examples.

## Use Case
Zero-shot image classification is useful for:
- Classifying images into new categories without retraining
- Rapid prototyping for new classification tasks
- Handling rare or emerging categories
- Dynamic category systems
- Custom image search with natural language queries

## Key Features
- No training data required for new categories
- Uses natural language descriptions as class labels
- Leverages CLIP's joint vision-language understanding
- Can classify into arbitrary categories on-the-fly

In [None]:
!pip install -q torch torchvision torchaudio
!pip install -q autogluon

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from autogluon.tabular import TabularDataset, TabularPredictor
import os
import shutil

In [None]:
# Load dataset
# TODO: Upload your image dataset or use URL
# For zero-shot classification, you need:
# - Images (file paths in a column)
# - Optional: labels for evaluation (not needed for inference)

# Example: train_data = TabularDataset('https://your-image-dataset-url.csv')

# Example placeholder - replace with your actual data
# train_data = TabularDataset('path/to/your/image_data.csv')
# test_data = TabularDataset('path/to/your/test_data.csv')

# Note: For zero-shot, you may not need training data at all!
# You can directly classify new images with text labels

train_data = None  # Replace with your data (optional for zero-shot)
test_data = None   # Replace with your test data

print("Dataset loaded successfully!")
if train_data is not None:
    print(f"Training data shape: {train_data.shape}")
    print(train_data.head())

In [None]:
# Set label column
LABEL = 'label'  # TODO: Replace with your label column name (if available)

In [None]:
# Auto-detect problem type based on label
# Zero-shot classification is typically a classification task
if train_data is not None and LABEL in train_data.columns:
    # Check if the label is numeric (regression) or categorical (classification)
    if pd.api.types.is_numeric_dtype(train_data[LABEL]):
        # Check if it's continuous or discrete
        unique_ratio = train_data[LABEL].nunique() / len(train_data)
        if unique_ratio > 0.05:  # More than 5% unique values suggests regression
            problem_type = 'regression'
            eval_metric = 'rmse'
        else:
            problem_type = 'classification'
            eval_metric = 'roc_auc'
    else:
        problem_type = 'classification'
        eval_metric = 'roc_auc'
else:
    # Default to classification for zero-shot image tasks
    problem_type = 'classification'
    eval_metric = 'roc_auc'

print(f"Problem Type: {problem_type}")
print(f"Evaluation Metric: {eval_metric}")
print("\nNote: Zero-shot classification doesn't require training on your specific classes!")

In [None]:
# Train/Load the model
# For zero-shot with CLIP, we may use a pre-trained model directly
# or fine-tune if training data is available

if train_data is not None:
    # If training data is available, fine-tune for better performance
    predictor = TabularPredictor(
        label=LABEL,
        problem_type=problem_type,
        eval_metric=eval_metric,
        path='./autogluon-clip-model'
    ).fit(
        train_data=train_data,
        presets='medium_quality',
        time_limit=900
    )
    print("Model training completed!")
else:
    # For pure zero-shot, load pre-trained CLIP model
    print("No training data provided. Using pre-trained CLIP for zero-shot classification.")
    print("You can classify images by providing text descriptions of classes.")
    # predictor = None  # Would load pre-trained CLIP here

In [None]:
# Display and save leaderboard
if train_data is not None and test_data is not None:
    leaderboard = predictor.leaderboard(test_data, silent=True)
    print("\nModel Leaderboard:")
    print(leaderboard)
    
    # Save leaderboard to CSV
    leaderboard.to_csv('leaderboard.csv', index=False)
    print("\nLeaderboard saved to leaderboard.csv")
else:
    print("Leaderboard requires both training and test data.")

In [None]:
# Display and save feature importance
try:
    if train_data is not None:
        feature_importance = predictor.feature_importance(test_data)
        print("\nFeature Importance:")
        print(feature_importance)
        
        # Save feature importance to CSV
        feature_importance.to_csv('feature_importance.csv')
        print("\nFeature importance saved to feature_importance.csv")
except Exception as e:
    print(f"Could not compute feature importance: {e}")

In [None]:
# Make predictions
if test_data is not None and train_data is not None:
    predictions = predictor.predict(test_data)
    print("\nPredictions:")
    print(predictions.head())
    
    # For classification, also show prediction probabilities
    if problem_type == 'classification':
        pred_probs = predictor.predict_proba(test_data)
        print("\nPrediction Probabilities:")
        print(pred_probs.head())

# Example: Zero-shot classification with custom text labels
print("\nZero-Shot Classification Example:")
print("You can classify images into any categories using text descriptions:")
print("""\nExample usage:
categories = ['a photo of a cat', 'a photo of a dog', 'a photo of a bird']
image_path = 'path/to/image.jpg'
# Use CLIP to classify image into these categories without training!\n""")

In [None]:
# Save model artifacts as zip file
model_path = './autogluon-clip-model'
zip_filename = 'autogluon_clip_model'

if os.path.exists(model_path):
    shutil.make_archive(zip_filename, 'zip', model_path)
    print(f"\nModel artifacts saved to {zip_filename}.zip")
else:
    print("Model path not found. Train the model first or use pre-trained CLIP.")