# AutoGluon Tabular: Multi-Label Classification

## Objective
This notebook demonstrates **multi-label classification** using AutoGluon Tabular. In multi-label problems, each instance can belong to multiple classes simultaneously (e.g., tagging articles with multiple topics, or predicting multiple disease diagnoses).

## Use Case
Multi-label classification is useful for:
- Document/article tagging with multiple categories
- Medical diagnosis with multiple conditions
- Product categorization with multiple attributes
- Image annotation with multiple objects

In [None]:
!pip install -q torch torchvision torchaudio
!pip install -q autogluon

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from autogluon.tabular import TabularDataset, TabularPredictor
import os
import shutil

In [None]:
# Load dataset
# TODO: Upload your multi-label dataset or use URL
# For multi-label, labels should be in separate binary columns or as a list-like format
# Example: train_data = TabularDataset('https://your-dataset-url.csv')

# Example placeholder - replace with your actual data
# train_data = TabularDataset('path/to/your/multilabel_data.csv')
# test_data = TabularDataset('path/to/your/test_data.csv')

train_data = None  # Replace with your data
test_data = None   # Replace with your data

print("Dataset loaded successfully!")
if train_data is not None:
    print(f"Training data shape: {train_data.shape}")
    print(train_data.head())

In [None]:
# Set label column(s)
# For multi-label, you can either:
# 1. Use multiple binary columns as labels
# 2. Use a single column with list-like values

# Option 1: Multiple binary label columns
LABEL = ['label1', 'label2', 'label3']  # TODO: Replace with your label column names

# Option 2: Single column with multi-label format
# LABEL = 'labels'  # Column containing lists or comma-separated values

In [None]:
# Auto-detect problem type
# Multi-label classification should be explicitly set
problem_type = 'multilabel'
eval_metric = 'roc_auc'  # Common metric for multi-label classification

print(f"Problem Type: {problem_type}")
print(f"Evaluation Metric: {eval_metric}")

In [None]:
# Train the model
predictor = TabularPredictor(
    label=LABEL,
    problem_type=problem_type,
    eval_metric=eval_metric,
    path='./autogluon-multilabel-model'
).fit(
    train_data=train_data,
    presets='medium_quality',
    time_limit=900
)

print("Model training completed!")

In [None]:
# Display and save leaderboard
leaderboard = predictor.leaderboard(test_data, silent=True)
print("\nModel Leaderboard:")
print(leaderboard)

# Save leaderboard to CSV
leaderboard.to_csv('leaderboard.csv', index=False)
print("\nLeaderboard saved to leaderboard.csv")

In [None]:
# Display and save feature importance
try:
    feature_importance = predictor.feature_importance(test_data)
    print("\nFeature Importance:")
    print(feature_importance)
    
    # Save feature importance to CSV
    feature_importance.to_csv('feature_importance.csv')
    print("\nFeature importance saved to feature_importance.csv")
except Exception as e:
    print(f"Could not compute feature importance: {e}")

In [None]:
# Make predictions
if test_data is not None:
    predictions = predictor.predict(test_data)
    print("\nPredictions:")
    print(predictions.head())
    
    # For multi-label, you can also get prediction probabilities
    pred_probs = predictor.predict_proba(test_data)
    print("\nPrediction Probabilities:")
    print(pred_probs.head() if hasattr(pred_probs, 'head') else pred_probs[:5])

In [None]:
# Save model artifacts as zip file
model_path = './autogluon-multilabel-model'
zip_filename = 'autogluon_multilabel_model'

if os.path.exists(model_path):
    shutil.make_archive(zip_filename, 'zip', model_path)
    print(f"\nModel artifacts saved to {zip_filename}.zip")
else:
    print("Model path not found. Train the model first.")