# Image Classification

## Load Sample Data (Image)

In [None]:
from PIL import Image

img = Image.open("images/anime-image.jpg")

In [None]:
checkpoint = "prithivMLmods/vit-mini-explicit-content"

In [None]:
labels = [
    "Anime Picture",
    "Enticing & Sensual",
    "Hentai",
    "Pornography",
    "Safe for Work"
]

## Predict Through Pipeline

In [None]:
from transformers import pipeline

classifier = pipeline("image-classification", model=checkpoint)

Device set to use cpu


In [None]:
classifier(img)

[{'label': 'Anime Picture', 'score': 0.8580977320671082},
 {'label': 'Hentai', 'score': 0.05237792804837227},
 {'label': 'Safe for Work', 'score': 0.04437308758497238},
 {'label': 'Pornography', 'score': 0.02566503919661045},
 {'label': 'Extincing & Sensual', 'score': 0.01948624849319458}]

## Predict Through AutoFeatureExtractor and AutoModelForImageClassification

In [None]:
from transformers import AutoFeatureExtractor, AutoModelForImageClassification

processor = AutoFeatureExtractor.from_pretrained(checkpoint)
transformer_model = AutoModelForImageClassification.from_pretrained(checkpoint)



In [None]:
import torch

inputs = processor(img, return_tensors="pt")
print(inputs.keys())
inputs

dict_keys(['pixel_values'])


{'pixel_values': tensor([[[[-0.2863, -0.2784, -0.2627,  ...,  0.7961,  0.8039,  0.8118],
          [-0.2863, -0.2784, -0.2627,  ...,  0.7961,  0.8039,  0.8118],
          [-0.2863, -0.2784, -0.2627,  ...,  0.8039,  0.8118,  0.8118],
          ...,
          [-0.4510, -0.7961, -0.7804,  ..., -0.6314, -0.6157, -0.5529],
          [-0.3490, -0.7804, -0.7725,  ..., -0.6627, -0.6392, -0.5765],
          [-0.2863, -0.7725, -0.7725,  ..., -0.6863, -0.6549, -0.5922]],

         [[ 0.5765,  0.5765,  0.5765,  ...,  0.9843,  0.9765,  0.9608],
          [ 0.5765,  0.5765,  0.5765,  ...,  0.9843,  0.9765,  0.9608],
          [ 0.5765,  0.5765,  0.5765,  ...,  0.9922,  0.9843,  0.9608],
          ...,
          [-0.4353, -0.7490, -0.6941,  ...,  0.2706,  0.2471,  0.2392],
          [-0.3490, -0.7412, -0.6863,  ...,  0.2471,  0.2314,  0.2157],
          [-0.3020, -0.7412, -0.6863,  ...,  0.2235,  0.2235,  0.2000]],

         [[ 0.9765,  0.9765,  0.9843,  ...,  1.0000,  0.9922,  0.9922],
          [ 0

In [None]:
with torch.no_grad():
    outputs = transformer_model(**inputs)
outputs

ImageClassifierOutput(loss=None, logits=tensor([[ 2.5009, -1.2841, -0.2954, -1.0087, -0.4612]]), hidden_states=None, attentions=None)

In [None]:
logits = outputs.logits
logits

tensor([[ 2.5009, -1.2841, -0.2954, -1.0087, -0.4612]])

In [None]:
probs = torch.nn.functional.softmax(logits, dim=1)
probs

tensor([[0.8581, 0.0195, 0.0524, 0.0257, 0.0444]])

In [None]:
for i, prob in enumerate(probs.squeeze()):
    print(f"{labels[i]}: {prob*100:.2f}%")

Anime Picture: 85.81%
Enticing & Sensual: 1.95%
Hentai: 5.24%
Pornography: 2.57%
Safe for Work: 4.44%
