In [None]:
!pip install spark-nlp==4.1.0 pyspark==3.3.0

# Spark NLP Image Classification
## Vision Transformer (ViT) models

### Benchmarking image classification in Spark NLP on CPU, CPU with one DNN enabled, and GPU

**Disclaimer**: This notebook is for the benchmarking purposes. In order to use ViT image classification in Spark NLP you can just simply follow this code:

```python
imageAssembler = ImageAssembler() \
    .setInputCol("image") \
    .setOutputCol("image_assembler")

imageClassifier = ViTForImageClassification \
    .pretrained("image_classifier_vit_base_patch16_224") \
    .setInputCols("image_assembler") \
    .setOutputCol("class")

pipeline = Pipeline(stages=[
    imageAssembler,
    imageClassifier
])


pipelineModel = pipeline.fit(testDataset)
pipelineDF = pipelineModel.transform(testDataset)
```

If you need to download the datasets (sample/full)

In [1]:
# !wget -q https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/images/imagenet-mini-sample.zip && unzip ./imagenet-mini-sample.zip >/dev/null 2>&1
# !wget -q https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/resources/en/images/imagenet-mini.zip && unzip ./imagenet-mini.zip >/dev/null 2>&1

- let's select the device we want to benchmark
>keeping in mind in a normal Spark NLP application `spark = sparknlp.start()` is all you need - for GPU you do `spark = sparknlp.start(gpu=True)`

In [None]:
lib_device = "4.1.0-cpu"
# lib_device = "4.1.0-cpu-opt"
# lib_device = "4.1.0-gpu"

In [None]:
from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *
import sparknlp

from pyspark.ml import Pipeline
from pyspark.sql import SparkSession

from timeit import default_timer as timer

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


spark = SparkSession

if(lib_device == "4.1.0-cpu"):
    os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
    !export TF_ENABLE_ONEDNN_OPTS=0
    

    spark = sparknlp.start(memory="128g")

elif(lib_device == "4.1.0-cpu-opt"):
    os.environ["TF_ENABLE_ONEDNN_OPTS"] = "1"
    !export TF_ENABLE_ONEDNN_OPTS=1
    
    spark = sparknlp.start(memory="128g")

elif(lib_device == "4.1.0-gpu"):
    os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0"
    !export TF_ENABLE_ONEDNN_OPTS=0
    
    spark = sparknlp.start(gpu=True, memory="128g")
    
    
print("Spark NLP: ", sparknlp.version())
print("Apache Spark: ", spark.version)

In [2]:
DATASET_SIZE = "sample"
# DATASET_SIZE = "full"
DATASET_PATH = ""

if(DATASET_SIZE == "sample"):
    DATASET_PATH = "./imagenet-mini-sample"
else:    
    DATASET_PATH = "./imagenet-mini"

testDataset = spark.read\
  .format("image")\
  .option("dropInvalid", value = True)\
  .load(DATASET_PATH)

#  for sample dataset (3k)
#  .load("./imagenet-mini-sample")
#  for full dataset (34k)
#  .load("./imagenet-mini")

print(testDataset.count())
testDataset.show(3)

                                                                                

34742


                                                                                

+--------------------+
|               image|
+--------------------+
|{file:///home/maz...|
|{file:///home/maz...|
|{file:///home/maz...|
+--------------------+
only showing top 3 rows



In [3]:
imageAssembler = ImageAssembler() \
    .setInputCol("image") \
    .setOutputCol("image_assembler")

def RunBenchPipeline(annot, model, batch_size=8):
        print("-" * 30)
        if annot == "ViTForImageClassification":
            imageClassifier = ViTForImageClassification \
                .pretrained("image_classifier_vit_base_patch16_224") \
                .setInputCols("image_assembler") \
                .setOutputCol("class") \
                .setBatchSize(batch_size)

            pipeline = Pipeline(stages=[
                imageAssembler,
                imageClassifier
            ])

            pipelineModel = pipeline.fit(testDataset)
            pipelineDF = pipelineModel.transform(testDataset)
            
            with open('./{}_{}_{}.txt'.format(lib_device, annot, model), 'a') as f:
                print("-" * 30, file=f)
                start = timer()
                total_count = pipelineDF.select("class.result").count()
                end = timer() - start
                print(f'{lib_device}: took {end:.2f} seconds to finish computing {total_count} images with batch size {imageClassifier.getBatchSize()}')
                print(f'{lib_device}: took {end:.2f} seconds to finish computing {total_count} images with batch size {imageClassifier.getBatchSize()}', file=f)

In [None]:
# This is a nice way to call the pipeline for benchmarks
# Set different batch sizes, choose different annotators, and specific models for them
# It's more flexible than a simple for loops to set different batch sizes

benchmarks = {
    'image_classifier_vit_base_patch16_224': 'ViTForImageClassification'
}

for model, annot in benchmarks.items():
    RunBenchPipeline(annot, model, 2)
    RunBenchPipeline(annot, model, 8)
    RunBenchPipeline(annot, model, 16)
    RunBenchPipeline(annot, model, 32)
    RunBenchPipeline(annot, model, 64)
    RunBenchPipeline(annot, model, 128)
    RunBenchPipeline(annot, model, 256)
    RunBenchPipeline(annot, model, 512)
    RunBenchPipeline(annot, model, 1024)

## imagenet-mini-sample

### CPU
```
                                                                                
4.1.0-cpu: took 161.02 seconds to finish computing 3544 images with batch size 2
------------------------------
4.1.0-cpu: took 144.00 seconds to finish computing 3544 images with batch size 8
------------------------------
4.1.0-cpu: took 129.40 seconds to finish computing 3544 images with batch size 16
------------------------------
4.1.0-cpu: took 138.17 seconds to finish computing 3544 images with batch size 32
------------------------------
4.1.0-cpu: took 138.52 seconds to finish computing 3544 images with batch size 64
------------------------------
4.1.0-cpu: took 136.54 seconds to finish computing 3544 images with batch size 128
```

### CPU with oneDNN
```
------------------------------
4.1.0-cpu-opt: took 113.67 seconds to finish computing 3544 images with batch size 2
------------------------------
4.1.0-cpu-opt: took 116.34 seconds to finish computing 3544 images with batch size 8
------------------------------
4.1.0-cpu-opt: took 131.04 seconds to finish computing 3544 images with batch size 16
------------------------------
4.1.0-cpu-opt: took 144.99 seconds to finish computing 3544 images with batch size 32
------------------------------
4.1.0-cpu-opt: took 157.73 seconds to finish computing 3544 images with batch size 64
------------------------------
4.1.0-cpu-opt: took 152.32 seconds to finish computing 3544 images with batch size 128

```

### GPU
```
------------------------------
4.1.0-gpu: took 48.91 seconds to finish computing 3544 images with batch size 2
------------------------------
4.1.0-gpu: took 41.15 seconds to finish computing 3544 images with batch size 8
------------------------------
4.1.0-gpu: took 38.80 seconds to finish computing 3544 images with batch size 16
------------------------------
4.1.0-gpu: took 36.55 seconds to finish computing 3544 images with batch size 32
------------------------------
4.1.0-gpu: took 42.44 seconds to finish computing 3544 images with batch size 64
------------------------------
4.1.0-gpu: took 44.06 seconds to finish computing 3544 images with batch size 128
------------------------------
4.1.0-gpu: took 41.46 seconds to finish computing 3544 images with batch size 256
------------------------------
4.1.0-gpu: took 51.32 seconds to finish computing 3544 images with batch size 512
------------------------------
4.1.0-gpu: took 41.76 seconds to finish computing 3544 images with batch size 1024
```


## imagenet-mini

### CPU
```
4.1.0-cpu: took 1423.02 seconds to finish computing 34742 images with batch size 16
```

### CPU with oneDNN
```
4.1.0-cpu-opt: took 1277.69 seconds to finish computing 34742 images with batch size 2
```

### GPU
```
------------------------------
4.1.0-gpu: took 408.46 seconds to finish computing 34742 images with batch size 2
------------------------------
4.1.0-gpu: took 337.21 seconds to finish computing 34742 images with batch size 8
------------------------------
4.1.0-gpu: took 306.45 seconds to finish computing 34742 images with batch size 16
------------------------------
4.1.0-gpu: took 277.35 seconds to finish computing 34742 images with batch size 32
------------------------------
4.1.0-gpu: took 280.50 seconds to finish computing 34742 images with batch size 64
------------------------------
4.1.0-gpu: took 297.29 seconds to finish computing 34742 images with batch size 128
------------------------------
4.1.0-gpu: took 283.23 seconds to finish computing 34742 images with batch size 256
------------------------------
4.1.0-gpu: took 289.30 seconds to finish computing 34742 images with batch size 512
------------------------------
4.1.0-gpu: took 285.95 seconds to finish computing 34742 images with batch size 1024

```

### Hardware

In [5]:
!lscpu

!free -h

!nvidia-smi

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                80
On-line CPU(s) list:   0-79
Thread(s) per core:    2
Core(s) per socket:    20
Socket(s):             2
NUMA node(s):          2
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 79
Model name:            Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz
Stepping:              1
CPU MHz:               1200.718
CPU max MHz:           3600.0000
CPU min MHz:           1200.0000
BogoMIPS:              4401.48
Virtualization:        VT-x
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              51200K
NUMA node0 CPU(s):     0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,74,76,78
NUMA node1 CPU(s):     1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79
Flags:            