# Analysis of the computer vision models on the HuggingFace platform



In [None]:
import pandas as pd
import pandas as pd
import ast  # For safely evaluating string representations of lists

# Load the CSV data
hf_data = pd.read_csv('huggingface_models_all.csv')

# Function to parse the 'labels' column safely
def parse_labels(label_str):
    """
    Safely parse string representations of lists into actual Python lists.
    Handles NaN by returning an empty list.
    """
    if pd.isna(label_str):
        return []
    try:
        return ast.literal_eval(label_str)
    except (ValueError, SyntaxError):
        # In case the string is not properly formatted
        return []

# Apply parsing to the entire 'labels' column
hf_data['parsed_labels'] = hf_data['labels'].apply(parse_labels)

# Initialize a dictionary to store unique labels for each task
task_labels_dict = {}

# Group by 'task' and aggregate labels
for task, group in hf_data.groupby('task'):
    # Flatten all label lists for this task
    all_labels = []
    for label_list in group['parsed_labels']:
        all_labels.extend(label_list)  # Combine all labels
    
    # Remove duplicates using set()
    unique_labels = set(all_labels)
    
    # Store in dictionary
    task_labels_dict[task] = unique_labels

# Display results
for task, labels in task_labels_dict.items():
    print(f"\nTask: {task}")
    print(f"Number of unique labels: {len(labels)}")
    print(f"Sample labels: {list(labels)[:10]}")  # Show up to 10 labels for brevity


Task: any-to-any
Number of unique labels: 0
Sample labels: []

Task: audio-text-to-text
Number of unique labels: 0
Sample labels: []

Task: automatic-speech-recognition
Number of unique labels: 2
Sample labels: ['LABEL_1', 'LABEL_0']

Task: document-question-answering
Number of unique labels: 2
Sample labels: ['LABEL_1', 'LABEL_0']

Task: feature-extraction
Number of unique labels: 2
Sample labels: ['LABEL_1', 'LABEL_0']

Task: image-classification
Number of unique labels: 261737
Sample labels: [0, 1, 2, 3, 'LABEL_7859', 'LABEL_15972', 4, 5, 6, 'LABEL_17863']

Task: image-feature-extraction
Number of unique labels: 1001
Sample labels: ['borzoi, Russian wolfhound', 'modem', 'mouse, computer mouse', 'sorrel', 'spoonbill', 'bolo tie, bolo, bola tie, bola', 'mobile home, manufactured home', 'Shih-Tzu', 'nipple', 'Dungeness crab, Cancer magister']

Task: image-segmentation
Number of unique labels: 2128
Sample labels: [0, 'giraffe', 1, 2, 'carport', 'pane, pane of glass, window glass', 3, 4