In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%reload_ext autoreload

Import fashion_clip abstractions and utils

In [None]:
import os
from fashion_clip.fashion_clip import FashionCLIP, FCLIPDataset
from fashion_clip.utils import get_cache_directory, display_images
print("Cache is at {}".format(get_cache_directory()))

## Basic Setup
Load dataset:
- Download FF dataset
- Construct your own dataset

Dataset abstraction expects a list of dictionaries. Each element representing a single product/item.
Each element __must__ contain at least: _id_, _image_ (filename), _caption_

Currently, we assume images are all stored in some local folder

In [None]:
# download included dataset
# dataset = FCLIPDataset('FF',
#                           image_source_path=os.getenv('S3_IMAGES_PATH'),
#                           image_source_type='s3')

# OR

# Loading a local dataset
# Create some local dataset
catalog = [
    {'id': 1, 'image': '16867424.jpg', 'caption': 'light red polo shirt'},
    {'id': 2, 'image': '16790484.jpg', 'caption': 'an adidas sneaker'},
    {'id': 3, 'image': '16198646.jpg', 'caption': 'dark red polo shirt'},
]
dataset = FCLIPDataset('farfetch_local',
                       image_source_path='./images',
                       image_source_type='local',
                       catalog=catalog)

Create FCLIP Object by specifying
1. Model (Pre-trained (storedd in S3) or Local File)
2. Dataset (FCLIPDataset)

Under the hood: if dataset and model combination is recognized via hashing, we download the
pre-processed vectors, else upon instantiation we generate vectors for the dataset

In [None]:
# Instantiate FCLIP with pre-defined model and dataset
fclip = FashionCLIP('fashion-clip', dataset)
# fclip = FashionCLIP('openai/clip-vit-base-patch32', dataset)

## Demo Functionality

1 . Zero-shot classification

We do not use pre-processed vectors here, although there is a possiblity of adding the option to
specify images as SKU/ID as pull pre-processed vectors accordingly.

In [None]:
test_captions = [
    "nike sneakers", "adidas sneakers", "nike blue sneakers", "converse", "nike",
    "library", "the flag of italy", "pizza", "a gucci dress"
]
test_img_path = 'images/16790484.jpg'
display_images([test_img_path])
fclip.zero_shot_classification([test_img_path], test_captions)

2 . Product Retrieval

We perform retrieval over the images from the dataset specified at instantiation.

In [None]:
candidates = fclip.retrieval(['shoes'])
print(candidates)
_ = dataset.display_products(dataset.ids[candidates[0]], fields=tuple(['id']))

In [None]:
candidates = fclip.retrieval(['shirt'])
print(candidates)
_ = dataset.display_products(dataset.ids[candidates[0]], fields=tuple(['id']))

3. Attention Masking

Taken from [CLIP Italian](https://huggingface.co/spaces/clip-italian/clip-italian-demo)

In [None]:
fclip.display_attention(image_path='images/nike_dress.jpg',
                        query_text='nike',
                        pixel_size=15,
                        iterations=15
)
