## Multilabel Classification of Frames of Futurama

Dataset Source: https://www.kaggle.com/datasets/gonzalorecioc/futurama-frames-with-characteronscreen-data

#### Import Necessary Libraries

In [1]:
import os, sys

import numpy as np
import pandas as pd

import PIL.Image
from PIL import ImageDraw, ImageFont

import transformers
from transformers import AutoTokenizer, ViTForImageClassification
from transformers import TrainingArguments, Trainer, set_seed, ViTImageProcessor

import datasets
from datasets import Dataset, DatasetDict, load_dataset

import torch

import evaluate

from sklearn.metrics import f1_score, roc_auc_score, accuracy_score

!git lfs install

NOTE: Redirects are currently not supported in Windows or MacOs.


Git LFS initialized.


##### Display Version of Relevant Libraries

In [2]:
print("Python:".rjust(15), sys.version[0:6])
print("Pandas:".rjust(15), pd.__version__)
print("NumPy:".rjust(15), np.__version__)
print("Torch:".rjust(15), torch.__version__)
print("Transformers:".rjust(15), transformers.__version__)
print("Datasets:".rjust(15), datasets.__version__)
print("Evaluate:".rjust(15), evaluate.__version__)

        Python: 3.9.12
        Pandas: 1.5.0
         NumPy: 1.23.3
         Torch: 1.12.1
  Transformers: 4.26.1
      Datasets: 2.8.0
      Evaluate: 0.2.2


#### Make Necessary Updates to Metadata File

In [3]:
parent_dir = "/Users/briandunn/Desktop/Vit_Image_Datasets/ML CLF/Futurama Frames"

df = pd.read_csv(os.path.join(parent_dir, "data.csv"))

df = df.rename(columns={"file" : "file_name"})

df.to_csv(os.path.join(parent_dir, "img", "metadata.csv"), index=False)

df

Unnamed: 0,file_name,isLeela,isFry,isBender
0,Futurama_1_04081.png,0,1,0
1,Futurama_2_17161.png,1,1,0
2,Futurama_7_14497.png,1,1,1
3,Futurama_6_23881.png,0,0,0
4,Futurama_5_02065.png,1,0,0
...,...,...,...,...
9151,Futurama_2_06193.png,0,1,0
9152,Futurama_1_10801.png,0,0,0
9153,Futurama_6_17593.png,1,1,1
9154,Futurama_6_00313.png,0,1,0


#### Ingest Dataset

In [4]:
dataset = load_dataset(os.path.join(parent_dir, "img"), split='train')
dataset
dataset[12]

A Jupyter Widget

Using custom data configuration img-00d4e2cc6bbffb36


Downloading and preparing dataset imagefolder/img to /Users/briandunn/.cache/huggingface/datasets/imagefolder/img-00d4e2cc6bbffb36/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f...


A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

Dataset imagefolder downloaded and prepared to /Users/briandunn/.cache/huggingface/datasets/imagefolder/img-00d4e2cc6bbffb36/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f. Subsequent calls will reuse this data.


{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=180x135>,
 'isLeela': 0,
 'isFry': 0,
 'isBender': 0}

#### Split Dataset into Training & Testing Datasets

In [5]:
train_test_ds = dataset.train_test_split(test_size=0.20)

ds = DatasetDict({
    'train': train_test_ds['train'], 
    'eval': train_test_ds['test']
    })

print('Training data shape:', ds['train'].shape)
print('Validation data shape:', ds['eval'].shape)

Training data shape: (7324, 4)
Validation data shape: (1832, 4)


#### Define List of Labels & Label Conversion Dictionaries

In [6]:
labels = [x for x in df.columns if x not in ['image', 'file_name']]

num_labels=len(labels)
id2label={str(i): c for i, c in enumerate(labels)}
label2id={c: str(i) for i, c in enumerate(labels)}

#### Basic Values/Constants

In [7]:
set_seed = 42
NUM_OF_EPOCHS = 8

BATCH_SIZE = 8
LEARNING_RATE = 2e-5

REPORTS_TO = "tensorboard"
STRATEGY = "epoch"

MODEL_CKPT = "google/vit-base-patch16-224"
MODEL_NAME = MODEL_CKPT.split("/")[-1] + "-Futurama_Image_multilabel_clf"

METRIC_NAME = "f1"
DEVICE = torch.device("mps")

#### Load ViT Feature Extractor

In [8]:
feature_extractor = ViTImageProcessor.from_pretrained(MODEL_CKPT)

#### Preprocess Dataset

In [9]:
def transform(sample_batch):
    # Convert list of PIL Images into pixel values
    inputs = feature_extractor([x for x in sample_batch['image']], return_tensors="pt")
    
    # Prep Labels
    labels_batch = {k: sample_batch[k] for k in sample_batch.keys() if k in labels}
    
    labels_matrix = np.zeros((len(sample_batch['image']), len(labels)))
    
    for idx, label in enumerate(labels):
        labels_matrix[:, idx] = labels_batch[label]
    
    inputs["labels"] = labels_matrix.tolist()
    
    return inputs

#### Apply Transform Function to Dataset

In [10]:
print(ds['train'][12])

prepped_ds = ds.with_transform(transform)
prepped_ds
prepped_ds['train'][12]

{'image': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=180x135 at 0x7FE688150F40>, 'isLeela': 0, 'isFry': 0, 'isBender': 0}


{'pixel_values': tensor([[[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]],
 
         [[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]],
 
         [[-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          ...,
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.],
          [-1., -1., -1.,  ..., -1., -1., -1.]]]),
 'labels': [0.0, 0.0, 0.0]}

#### Instantiate Model

In [11]:
model = ViTForImageClassification.from_pretrained(
    MODEL_CKPT, 
    problem_type="multi_label_classification",
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(DEVICE)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


#### Define Data Collator Function

In [12]:
def data_collator(batch):
    return {
        'pixel_values' : torch.stack([x['pixel_values'] for x in batch]),
        'labels' : torch.tensor([x['labels'] for x in batch]),
    }

#### Define Function to Compute Metrics 

In [13]:
def multi_label_metrics(predictions, 
                        labels, 
                        threshold=0.5):
    '''
    This function calculates & returns metrics 
    for a multilabel classification analysis.
    '''
    
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    
    preds = preds[0] if isinstance(preds, 
                    tuple) else preds
    results = multi_label_metrics(
                    predictions=preds, 
                    labels=labels)
    return results

#### Define Training Arguments

In [14]:
args = TrainingArguments(
    output_dir=MODEL_NAME,
    remove_unused_columns=False,
    num_train_epochs=NUM_OF_EPOCHS,
    evaluation_strategy=STRATEGY,
    save_strategy=STRATEGY,
    learning_rate=LEARNING_RATE,
    disable_tqdm=False,
    report_to=REPORTS_TO,
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_NAME,
    logging_first_step=True,
    hub_private_repo=True,
    push_to_hub=True
)

#### Instantiate Trainer

In [15]:
trainer = Trainer(
    model=model,
    args=args,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    train_dataset=prepped_ds['train'],
    eval_dataset=prepped_ds['eval'],
    tokenizer=feature_extractor
)

Cloning https://huggingface.co/DunnBC22/vit-base-patch16-224-Futurama_Image_multilabel_clf into local empty directory.


#### Train Model

In [16]:
train_results = trainer.train()

***** Running training *****
  Num examples = 7324
  Num Epochs = 8
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 7328
  Number of trainable parameters = 85800963


A Jupyter Widget

{'loss': 0.6884, 'learning_rate': 1.999727074235808e-05, 'epoch': 0.0}
{'loss': 0.2456, 'learning_rate': 1.8635371179039304e-05, 'epoch': 0.55}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-916
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-916/config.json


{'eval_loss': 0.07229584455490112, 'eval_f1': 0.9710691823899372, 'eval_roc_auc': 0.9745990820070386, 'eval_accuracy': 0.9481441048034934, 'eval_runtime': 1680.1374, 'eval_samples_per_second': 1.09, 'eval_steps_per_second': 0.136, 'epoch': 1.0}


Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-916/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-916/preprocessor_config.json
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json
Adding files tracked by Git LFS: ['.DS_Store']. This may take a bit of time if the files are large.


{'loss': 0.0749, 'learning_rate': 1.7270742358078607e-05, 'epoch': 1.09}
{'loss': 0.0269, 'learning_rate': 1.5906113537117906e-05, 'epoch': 1.64}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-1832
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-1832/config.json


{'eval_loss': 0.05449194833636284, 'eval_f1': 0.9799095931692616, 'eval_roc_auc': 0.9818209165245149, 'eval_accuracy': 0.9639737991266376, 'eval_runtime': 1758.0937, 'eval_samples_per_second': 1.042, 'eval_steps_per_second': 0.13, 'epoch': 2.0}


Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-1832/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-1832/preprocessor_config.json
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


{'loss': 0.0234, 'learning_rate': 1.4541484716157206e-05, 'epoch': 2.18}
{'loss': 0.0086, 'learning_rate': 1.3176855895196507e-05, 'epoch': 2.73}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-2748
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-2748/config.json


{'eval_loss': 0.05799925699830055, 'eval_f1': 0.9794073329984931, 'eval_roc_auc': 0.9814291406807462, 'eval_accuracy': 0.9623362445414847, 'eval_runtime': 1649.1127, 'eval_samples_per_second': 1.111, 'eval_steps_per_second': 0.139, 'epoch': 3.0}


Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-2748/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-2748/preprocessor_config.json
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


{'loss': 0.0046, 'learning_rate': 1.181222707423581e-05, 'epoch': 3.28}
{'loss': 0.0044, 'learning_rate': 1.044759825327511e-05, 'epoch': 3.82}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-3664
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-3664/config.json


{'eval_loss': 0.06122291088104248, 'eval_f1': 0.9814350225790265, 'eval_roc_auc': 0.983205248199549, 'eval_accuracy': 0.9650655021834061, 'eval_runtime': 1658.3164, 'eval_samples_per_second': 1.105, 'eval_steps_per_second': 0.138, 'epoch': 4.0}


Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-3664/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-3664/preprocessor_config.json
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


{'loss': 0.0022, 'learning_rate': 9.082969432314411e-06, 'epoch': 4.37}
{'loss': 0.0027, 'learning_rate': 7.718340611353714e-06, 'epoch': 4.91}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-4580
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-4580/config.json


{'eval_loss': 0.05915337800979614, 'eval_f1': 0.98175456135966, 'eval_roc_auc': 0.984184901660414, 'eval_accuracy': 0.9672489082969432, 'eval_runtime': 1650.3185, 'eval_samples_per_second': 1.11, 'eval_steps_per_second': 0.139, 'epoch': 5.0}


Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-4580/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-4580/preprocessor_config.json
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


{'loss': 0.0017, 'learning_rate': 6.353711790393014e-06, 'epoch': 5.46}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-5496
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-5496/config.json


{'eval_loss': 0.06344735622406006, 'eval_f1': 0.9800399201596807, 'eval_roc_auc': 0.9831794434587476, 'eval_accuracy': 0.9645196506550219, 'eval_runtime': 1655.6208, 'eval_samples_per_second': 1.107, 'eval_steps_per_second': 0.138, 'epoch': 6.0}


Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-5496/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-5496/preprocessor_config.json
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


{'loss': 0.0013, 'learning_rate': 4.989082969432315e-06, 'epoch': 6.0}
{'loss': 0.0012, 'learning_rate': 3.624454148471616e-06, 'epoch': 6.55}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-6412
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-6412/config.json
Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-6412/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-6412/preprocessor_config.json


{'eval_loss': 0.06574680656194687, 'eval_f1': 0.98173630222667, 'eval_roc_auc': 0.9839758975166857, 'eval_accuracy': 0.9667030567685589, 'eval_runtime': 1588.6633, 'eval_samples_per_second': 1.153, 'eval_steps_per_second': 0.144, 'epoch': 7.0}


Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


{'loss': 0.0006, 'learning_rate': 2.259825327510917e-06, 'epoch': 7.1}
{'loss': 0.0005, 'learning_rate': 8.951965065502185e-07, 'epoch': 7.64}


***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-7328
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-7328/config.json
Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-7328/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-7328/preprocessor_config.json


{'eval_loss': 0.06680033355951309, 'eval_f1': 0.981235926945209, 'eval_roc_auc': 0.9835841216729171, 'eval_accuracy': 0.9667030567685589, 'eval_runtime': 1637.4794, 'eval_samples_per_second': 1.119, 'eval_steps_per_second': 0.14, 'epoch': 8.0}


Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from vit-base-patch16-224-Futurama_Image_multilabel_clf/checkpoint-4580 (score: 0.98175456135966).


{'train_runtime': 171163.2402, 'train_samples_per_second': 0.342, 'train_steps_per_second': 0.043, 'train_loss': 0.027292877175401912, 'epoch': 8.0}


#### Save & Log Model

In [17]:
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/config.json
Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json
Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/config.json
Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json
Several commits (2) will be pushed upstream.
The progress bars may be unreliable.


A Jupyter Widget

A Jupyter Widget

remote: Scanning LFS files for validity...        
remote: LFS file scan complete.        
To https://huggingface.co/DunnBC22/vit-base-patch16-224-Futurama_Image_multilabel_clf
   699db9b..46f164c  main -> main

To https://huggingface.co/DunnBC22/vit-base-patch16-224-Futurama_Image_multilabel_clf
   46f164c..0d54221  main -> main



***** train metrics *****
  epoch                    =                8.0
  train_loss               =             0.0273
  train_runtime            = 1 day, 23:32:43.24
  train_samples_per_second =              0.342
  train_steps_per_second   =              0.043


#### Evaluate, Log, & Save Metrics

In [18]:
metrics = trainer.evaluate(prepped_ds['eval'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** Running Evaluation *****
  Num examples = 1832
  Batch size = 8


A Jupyter Widget

***** eval metrics *****
  epoch                   =        8.0
  eval_accuracy           =     0.9672
  eval_f1                 =     0.9818
  eval_loss               =     0.0592
  eval_roc_auc            =     0.9842
  eval_runtime            = 0:27:32.60
  eval_samples_per_second =      1.109
  eval_steps_per_second   =      0.139


#### Push Model to Hub (My Profile!!!)

In [19]:
kwargs = {
    "finetuned_from": model.config._name_or_path,
    "tasks": "image-classification",
    "tags": ['image-classification'],
}

if args.push_to_hub:
    trainer.push_to_hub('All DUNN!!!', **kwargs)
else:
    trainer.create_model_card(**kwargs)

Saving model checkpoint to vit-base-patch16-224-Futurama_Image_multilabel_clf
Configuration saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/config.json
Model weights saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/pytorch_model.bin
Image processor saved in vit-base-patch16-224-Futurama_Image_multilabel_clf/preprocessor_config.json


A Jupyter Widget

remote: Scanning LFS files for validity...        
remote: LFS file scan complete.        
To https://huggingface.co/DunnBC22/vit-base-patch16-224-Futurama_Image_multilabel_clf
   0d54221..5606d1d  main -> main

To https://huggingface.co/DunnBC22/vit-base-patch16-224-Futurama_Image_multilabel_clf
   5606d1d..b08985e  main -> main



### Notes & Other Takeaways From This Project
****
- I am pleasantly surprised at how quickly this model was able to find the optimal solution.
****

### Citations
- Model Checkpoint
    - @misc{wu2020visual,
      title={Visual Transformers: Token-based Image Representation and Processing for Computer Vision}, 
      author={Bichen Wu and Chenfeng Xu and Xiaoliang Dai and Alvin Wan and Peizhao Zhang and Zhicheng Yan and Masayoshi Tomizuka and Joseph Gonzalez and Kurt Keutzer and Peter Vajda},
      year={2020},
      eprint={2006.03677},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

    - @inproceedings{deng2009imagenet,
      title={Imagenet: A large-scale hierarchical image database},
      author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li},
      booktitle={2009 IEEE conference on computer vision and pattern recognition},
      pages={248--255},
      year={2009},
      organization={Ieee}
}