In [1]:
import daft
from daft import DataFrame
from PIL import Image
import io
daft.context.set_runner_ray()

DaftContext(runner_config=_RayRunnerConfig(address=None, max_tasks_per_core=None, max_refs_per_core=None, batch_dispatch_coeff=None), disallow_set_runner=True)

In [2]:
df = DataFrame.read_parquet("s3://daft-public-data/coco-2017/mscoco.parquet")
df = df.repartition(16)
df = df.limit(500)

df = df.with_column(
    "image",
    df["URL"].url.download().apply(
        lambda data: Image.open(io.BytesIO(data)).resize((512, 512)), 
        return_type=Image.Image,
    )
)


2023-02-11 22:50:19.684 | INFO     | daft.context:runner:75 - Using RayRunner
2023-02-11 22:50:21,961	INFO worker.py:1529 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


In [3]:
# def load_safety_model(clip_model):
#     """load the safety model"""
#     import os
#     import numpy as np
#     import autokeras as ak  # pylint: disable=import-outside-toplevel
#     from tensorflow.keras.models import load_model  # pylint: disable=import-outside-toplevel

#     cache_folder = ".nsfw_cache"

#     if clip_model == "ViT-L/14":
#         model_dir = cache_folder + "/clip_autokeras_binary_nsfw"
#         dim = 768
#     elif clip_model == "ViT-B/32":
#         model_dir = cache_folder + "/clip_autokeras_nsfw_b32"
#         dim = 512
#     else:
#         raise ValueError("Unknown clip model")
#     if not os.path.exists(model_dir):
#         os.makedirs(cache_folder, exist_ok=True)

#         from urllib.request import urlretrieve  # pylint: disable=import-outside-toplevel

#         path_to_zip_file = cache_folder + "/clip_autokeras_binary_nsfw.zip"
#         if clip_model == "ViT-L/14":
#             url_model = "https://raw.githubusercontent.com/LAION-AI/CLIP-based-NSFW-Detector/main/clip_autokeras_binary_nsfw.zip"
#         elif clip_model == "ViT-B/32":
#             url_model = (
#                 "https://raw.githubusercontent.com/LAION-AI/CLIP-based-NSFW-Detector/main/clip_autokeras_nsfw_b32.zip"
#             )
#         else:
#             raise ValueError("Unknown model {}".format(clip_model))  # pylint: disable=consider-using-f-string
#         urlretrieve(url_model, path_to_zip_file)
#         import zipfile  # pylint: disable=import-outside-toplevel

#         with zipfile.ZipFile(path_to_zip_file, "r") as zip_ref:
#             zip_ref.extractall(cache_folder)

#     loaded_model = load_model(model_dir, custom_objects=ak.CUSTOM_OBJECTS)
#     loaded_model.predict(np.random.rand(10**3, dim).astype("float32"), batch_size=10**3)

#     return loaded_model
# nsfw_model = load_safety_model("ViT-B/32")

In [4]:
# import torch
# import clip
# from PIL import Image
# import fsspec
# bad_image = fsspec.open("https://www.damenmode-quelle.de/fotky421/fotos/_vyr_6097Sexy-Push-Up-Bikini-Brasilianisch-Bunt-2.jpg").open()

# device = "cuda" if torch.cuda.is_available() else "cpu"
# model, preprocess = clip.load("ViT-B/32", device=device)

# bad_image = Image.open(bad_image)
# image = preprocess(bad_image).unsqueeze(0).to(device)

# with torch.no_grad():
#     image_features = model.encode_image(image)
#     image_features = image_features.detach().cpu().float()
#     norm = image_features.norm(p=2, dim=1, keepdim=True)
# emb = (image_features / norm).numpy()

# bad_image.resize((256,256))

In [5]:

# nsfw_value = nsfw_model.predict(emb)

# print(nsfw_value)

In [6]:
import numpy as np
from daft import udf, col
from typing import List
import clip
import torch

@udf(return_type=np.ndarray)
class ImageClipExtractor:
    def __init__(self, model_name: str="ViT-B/32") -> None:
        self.device = "cpu"
        self.model, self.preprocess = clip.load(model_name, device=self.device)
        
    
    def __call__(self, images: List[Image.Image]) -> np.ndarray:        
        image = torch.stack([self.preprocess(img) for img in images]).to(self.device)
        
        with torch.no_grad():

            image_features = self.model.encode_image(image)
            image_features = image_features.detach().cpu().float()
            norm = image_features.norm(p=2, dim=1, keepdim=True)
            
        return (image_features / norm).numpy()
    
    
df = df.with_column('image_clip_embedding', ImageClipExtractor(col('image')))

In [7]:
@udf(return_type=np.ndarray)
class TextClipExtractor:
    def __init__(self, model_name: str="ViT-B/32") -> None:
        self.device = "cpu"
        self.model, self.preprocess = clip.load(model_name, device=self.device)
        
    
    def __call__(self, text: List[str]) -> np.ndarray:        
        tokens = clip.tokenize(text).to(self.device)

        with torch.no_grad():
            features = self.model.encode_text(tokens)
            features = features.detach().cpu().float()
            features /= features.norm(p=2, dim=-1, keepdim=True)
        return features.numpy()
df = df.with_column('text_clip_embedding', TextClipExtractor(col('TEXT')))

In [8]:
import numpy as np
@udf(return_type=float)
def cosine_similarity(A: List[np.ndarray], B: List[np.ndarray]) -> List[float]:
    return [np.dot(a,b) for a,b in zip(A, B)]

df = df.with_column("cosine_similarity", cosine_similarity(col("image_clip_embedding"), col("text_clip_embedding")))

In [9]:
df = df.sort("cosine_similarity", desc=True)

In [10]:
df.show(10)

[2m[36m(fanout_pipeline pid=2412076)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412076)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412107)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412078)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412078)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412103)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412102)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412104)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412104)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(fanout_pipeline pid=2412069)[0m   result = getattr(asarray(obj), method)(*args, **kwds)
[2m[36m(

URL STRING,TEXT STRING,image PY[Image],image_clip_embedding PY[ndarray],text_clip_embedding PY[ndarray],cosine_similarity FLOAT
http://images.cocodataset.org/train2017/000000565797.jpg,An orange striped tabby cat laying on top of a red vehicl...,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.383856
http://images.cocodataset.org/train2017/000000412151.jpg,A man adjust a bicycle in a bike shop with a child.,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.382186
http://images.cocodataset.org/train2017/000000562150.jpg,A young girl is holding a small cat.,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.378317
http://images.cocodataset.org/train2017/000000144941.jpg,A tiled bathroom with a potted plant as a center piece.,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.377428
http://images.cocodataset.org/train2017/000000144941.jpg,A potted plant is being displayed in a bathroom.,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.374028
http://images.cocodataset.org/train2017/000000328757.jpg,A young woman standing in a kitchen eats a plate of veget...,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.373756
http://images.cocodataset.org/train2017/000000565797.jpg,An orange cat hiding on the wheel of a red car.,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.363818
http://images.cocodataset.org/train2017/000000144941.jpg,Interior bathroom scene with modern furnishings including...,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.363233
http://images.cocodataset.org/train2017/000000032907.jpg,a toilet on the ground outdoors in front of a house,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.361989
http://images.cocodataset.org/train2017/000000383445.jpg,A donut on the antenna of a car.,,"<np.ndarray shape=(512,) dtype=float32>","<np.ndarray shape=(512,) dtype=float32>",0.361895
