In [1]:
# !unzip hotels-50K-sample.zip

In [2]:
# !pip install pandas

In [3]:
import pandas as pd
import csv
import os
from IPython.display import clear_output, display
from PIL import Image
from tqdm import tqdm
from clip_interrogator import Config, Interrogator

In [4]:
caption_model_name = 'blip-large'
clip_model_name = 'ViT-L-14/openai'

prompt_mode = 'best'

config = Config()
config.clip_model_name = clip_model_name
config.caption_model_name = caption_model_name
ci = Interrogator(config)

def image_analysis(image):
    image = image.convert('RGB')
    image_features = ci.image_to_features(image)

    top_mediums = ci.mediums.rank(image_features, 5)
    top_artists = ci.artists.rank(image_features, 5)
    top_movements = ci.movements.rank(image_features, 5)
    top_trendings = ci.trendings.rank(image_features, 5)
    top_flavors = ci.flavors.rank(image_features, 5)

    medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
    artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
    movement_ranks = {movement: sim for movement, sim in zip(top_movements, ci.similarities(image_features, top_movements))}
    trending_ranks = {trending: sim for trending, sim in zip(top_trendings, ci.similarities(image_features, top_trendings))}
    flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}

    return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks

def image_to_prompt(image, mode):
    ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    image = image.convert('RGB')
    if mode == 'best':
        return ci.interrogate(image)
    elif mode == 'classic':
        return ci.interrogate_classic(image)
    elif mode == 'fast':
        return ci.interrogate_fast(image)
    elif mode == 'negative':
        return ci.interrogate_negative(image)

Loading caption model blip-large...
Loading CLIP model ViT-L-14/openai...
Loaded CLIP model and data in 6.27 seconds.


In [7]:
df = pd.read_csv("/home/docker_current/src/hotels-50k/sample.csv")
df.head()

Unnamed: 0,image_id,hotel_id,url,source,timestamp,hotel_name,chain_id,latitude,longitude,chain_name,downloaded,image_name,image_folder
0,3918396,104555,https://traffickcam.com/images/2019/2/20180127...,traffickcam,1/27/18 20:55,Hilton Dallas Plano Granite Park,3,33.08815,-96.82055,Hilton,True,3918396.jpg,hotels-50k/images/train/3/104555/traffickcam
1,3919007,104555,https://traffickcam.com/images/2019/2/20180131...,traffickcam,1/31/18 0:47,Hilton Dallas Plano Granite Park,3,33.08815,-96.82055,Hilton,True,3919007.jpg,hotels-50k/images/train/3/104555/traffickcam
2,3960051,104555,https://traffickcam.com/images/2019/9/20180819...,traffickcam,8/19/18 22:12,Hilton Dallas Plano Granite Park,3,33.08815,-96.82055,Hilton,True,3960051.jpg,hotels-50k/images/train/3/104555/traffickcam
3,8473367,104555,https://i.travelapi.com/hotels/8000000/7600000...,travel_website,2019-12-22 06:01:36,Hilton Dallas Plano Granite Park,3,33.08815,-96.82055,Hilton,True,8473367.jpg,hotels-50k/images/train/3/104555/travel_website
4,8473368,104555,https://i.travelapi.com/hotels/8000000/7600000...,travel_website,2019-12-22 06:01:36,Hilton Dallas Plano Granite Park,3,33.08815,-96.82055,Hilton,True,8473368.jpg,hotels-50k/images/train/3/104555/travel_website


In [8]:
from tqdm import tqdm

In [11]:
prompts = []

for row in tqdm(df.iloc()):
    if row["downloaded"] if True:
        im_path = "/home/docker_current/src/" +  row["image_folder"] + "/" + row["image_name"]
        image = Image.open(im_path).convert('RGB')
        prompt = image_to_prompt(image, prompt_mode)
        prompts.append(prompt)
    else:
        prompts.append(None)


100%|██████████| 55/55 [00:00<00:00, 287.52it/s]
Flavor chain:  28%|██▊       | 9/32 [00:14<00:36,  1.57s/it]
100%|██████████| 55/55 [00:00<00:00, 282.29it/s]
100%|██████████| 6/6 [00:00<00:00, 207.50it/s]
100%|██████████| 50/50 [00:00<00:00, 282.84it/s]
100%|██████████| 55/55 [00:00<00:00, 283.43it/s]
Flavor chain:  38%|███▊      | 12/32 [00:18<00:30,  1.53s/it]
100%|██████████| 55/55 [00:00<00:00, 281.06it/s]
100%|██████████| 6/6 [00:00<00:00, 210.13it/s]
100%|██████████| 50/50 [00:00<00:00, 283.81it/s]
100%|██████████| 55/55 [00:00<00:00, 284.67it/s]
Flavor chain:  41%|████      | 13/32 [00:20<00:29,  1.57s/it]
100%|██████████| 55/55 [00:00<00:00, 281.64it/s]
100%|██████████| 6/6 [00:00<00:00, 193.56it/s]
100%|██████████| 50/50 [00:00<00:00, 279.47it/s]
100%|██████████| 55/55 [00:00<00:00, 198.37it/s]
Flavor chain:  44%|████▍     | 14/32 [00:21<00:27,  1.54s/it]
100%|██████████| 55/55 [00:00<00:00, 272.17it/s]
100%|██████████| 6/6 [00:00<00:00, 197.12it/s]
100%|██████████| 50/50 [00

TypeError: can only concatenate str (not "float") to str

In [12]:
row

image_id                                                  8473372
hotel_id                                                   104555
url             https://i.travelapi.com/hotels/8000000/7600000...
source                                             travel_website
timestamp                                     2019-12-22 06:01:36
hotel_name                       Hilton Dallas Plano Granite Park
chain_id                                                        3
latitude                                                 33.08815
longitude                                               -96.82055
chain_name                                                 Hilton
downloaded                                                  False
image_name                                                    NaN
image_folder                                                  NaN
Name: 8, dtype: object

In [14]:
row["downloaded"] is True

False