In [None]:
import os
from PIL import Image
from io import BytesIO
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
img_dir = '/content/images'
images = os.listdir(img_dir)
img_files = [os.path.join(img_dir, img) for img in images]
len(img_files)

In [None]:
def load_image(img_pth):
  with open(img_pth, 'rb') as f:
    image_bytes = f.read()
  image = Image.open(BytesIO(image_bytes))
  return image, image_bytes

def show_image(image, title=''):
  plt.imshow(image)
  plt.axis('on')
  plt.title(title)
  plt.show()

In [None]:
show_image(load_image(img_files[0])[0])

In [None]:
import tensorflow as tf
import tensorflow_hub as hub

### CLIP-IQA

In [None]:
!pip install --upgrade --force-reinstall -U torchmetrics[multimodal]

In [None]:
from torchmetrics.functional.multimodal import clip_image_quality_assessment
from torchvision.transforms.functional import pil_to_tensor

In [None]:
scores = []
for img_file in img_files:
  img, _ = load_image(img_file)
  show_image(img)
  img_tensor = pil_to_tensor(img)
  # options available:
  # quality
  # brightness
  # noisiness
  # colorfullness
  # sharpness
  # contrast
  # complexity
  # natural
  # happy
  # scary
  # new
  # warm
  # real
  # beautiful
  # lonely
  # relaxing
  score = clip_image_quality_assessment(img_tensor, prompts=("quality", ("aesthetic photo", "unaesthetic photo")))

  aesthetic_score = score['user_defined_0'].item()

  scores.append({
      'img': img_file.split('/')[-1],
      'aesthetic_score': aesthetic_score,
  })
  print('aesthetic_score:', aesthetic_score)

pd.DataFrame(scores).describe()

### CLIP: Classification

In [None]:
!pip install --upgrade --force-reinstall -U salesforce-lavis

In [None]:
import torch
from PIL import Image

from lavis.models import load_model_and_preprocess

In [None]:
# setup device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model, vis_processors, txt_processors = load_model_and_preprocess("clip_feature_extractor", model_type="ViT-B-16", is_eval=True, device=device)

In [None]:
cls_names = ["Vibrant, sharp photo with inviting ambiance, unique perspective, excellent lighting, enticing food presentation, and captivating details",
             "Dull, blurry photo with poor composition, harsh lighting, unappealing food presentation, and lack of engaging details"]
# Optional to use prompts to guide the model
cls_names = [txt_processors["eval"](cls_nm) for cls_nm in cls_names]

In [None]:
def score_image(image):
  raw_image = image.convert("RGB")
  image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
  sample = {"image": image, "text_input": cls_names}

  clip_features = model.extract_features(sample)

  image_features = clip_features.image_embeds_proj
  text_features = clip_features.text_embeds_proj

  sims = (image_features @ text_features.t())[0] / 0.01
  probs = torch.nn.Softmax(dim=0)(sims).tolist()

  return probs[0]

In [None]:
scores = []
for img_file in img_files:
  img, _ = load_image(img_file)
  show_image(img)
  aesthetics_score = score_image(img)

  scores.append({
      'img': img_file.split('/')[-1],
      'score': aesthetics_score
  })
  print("aesthetics_score:", aesthetics_score)

pd.DataFrame(scores).describe()

### BLIP: Classification

In [None]:
!pip install --upgrade --force-reinstall -U salesforce-lavis

In [None]:
import torch
from PIL import Image

from lavis.models import load_model_and_preprocess
from lavis.processors.blip_processors import BlipCaptionProcessor

In [None]:
# setup device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model, vis_processors, _ = load_model_and_preprocess("blip_feature_extractor", model_type="base", is_eval=True, device=device)

In [None]:
cls_names = ["Vibrant, sharp photo with inviting ambiance, unique perspective, excellent lighting, enticing food presentation, and captivating details",
             "Dull, blurry photo with poor composition, harsh lighting, unappealing food presentation, and lack of engaging details"]

text_processor = BlipCaptionProcessor()

cls_prompt = [text_processor(cls_nm) for cls_nm in cls_names]

In [None]:
def score_image(image):
  raw_image = image.convert("RGB")
  image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
  sample = {"image": image, "text_input": cls_names}

  image_features = model.extract_features(sample, mode="image").image_embeds_proj[:, 0]
  text_features = model.extract_features(sample, mode="text").text_embeds_proj[:, 0]

  sims = (image_features @ text_features.t())[0] / model.temp
  probs = torch.nn.Softmax(dim=0)(sims).tolist()

  return probs[0]

In [None]:
scores = []
for img_file in img_files:
  img, _ = load_image(img_file)
  show_image(img)
  aesthetics_score = score_image(img)

  scores.append({
      'img': img_file.split('/')[-1],
      'score': aesthetics_score
  })
  print("aesthetics_score:", aesthetics_score)

pd.DataFrame(scores).describe()