In [91]:
# !pip install ftfy regex tqdm
# !pip install git+https://github.com/openai/CLIP.git
# !pip install pyarrow

In [92]:
import os
import torch
import clip
import numpy as np
import pandas as pd
import pyarrow.feather as feather
from PIL import Image
from urllib import request
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms

In [93]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [94]:
id_data = pd.read_table("./data/photos.tsv000")
id_data = id_data[:10]

In [95]:
labels = ['morning','noon','afternoon','night','sunrise or sunset']
tkns = ['A photo taken at '+label for label in labels]
text = clip.tokenize(tkns).to(device)

In [96]:
BATCH_SIZE = 2
ln = len(id_data)

In [97]:
import pandas as pd
import asyncio
import tqdm.asyncio as tqdm
import datetime
import requests


async def process_url(img, name):

    resp = requests.get(img, stream = True)
    if resp.status_code == 200:
        with open(f'/home/gk/vscode/mentos2/imgs/{name}.jpg', mode='wb') as f:
                f.write(resp.content)
    

async def main(id_data):
    
    pbar = tqdm.tqdm(total=ln, position=0, ncols=90)
    for img, name in tqdm.tqdm(id_data[['photo_image_url','photo_id']].values):
        await asyncio.gather(process_url(img, name))
        pbar.update()
    


start = datetime.datetime.now()
await main(id_data[:10])
exec_time = (datetime.datetime.now() - start).seconds


100%|██████████| 10/10 [00:11<00:00,  1.12s/it]███████████| 10/10 [00:11<00:00,  1.00s/it]
100%|█████████████████████████████████████████████████████| 10/10 [00:11<00:00,  1.13s/it]


In [106]:
from tqdm import tqdm

results = []

for i in tqdm(range(0,ln,BATCH_SIZE)):
    images = [
        preprocess(
            Image.open(f"/home/gk/vscode/mentos2/imgs/{img_id}.jpg")
        ) for img_id in id_data['photo_id'][i:i+BATCH_SIZE]
    ]
    
    image_input = torch.tensor(np.stack(images)).to(device)
    
    with torch.no_grad():
        image_features = model.encode_image(image_input)

        logits_per_image, logits_per_text = model(image_input, text)

        # The softmax function takes the original confidence 
        # and applys a transform to make all the confidence add up to one
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
        results.append(probs)
        

100%|██████████| 5/5 [00:07<00:00,  1.56s/it]


In [110]:
results

[array([[0.24939512, 0.11317686, 0.5617848 , 0.04105058, 0.03459259],
        [0.42064378, 0.22109112, 0.3413293 , 0.00406866, 0.01286716]],
       dtype=float32),
 array([[0.3431934 , 0.4437798 , 0.16850986, 0.00502489, 0.03949203],
        [0.46548992, 0.10493674, 0.36747718, 0.00647406, 0.055622  ]],
       dtype=float32),
 array([[0.3381277 , 0.13543364, 0.49400395, 0.01339193, 0.01904278],
        [0.38754764, 0.09125071, 0.3338901 , 0.00306765, 0.18424398]],
       dtype=float32),
 array([[4.2146990e-01, 4.6916127e-02, 2.1870366e-01, 6.8997761e-04,
         3.1222031e-01],
        [9.2957430e-02, 1.4701550e-02, 3.8343336e-02, 1.4332941e-02,
         8.3966470e-01]], dtype=float32),
 array([[0.37139437, 0.00996081, 0.04555314, 0.00794072, 0.565151  ],
        [0.31442267, 0.23331885, 0.2961083 , 0.02424738, 0.13190274]],
       dtype=float32)]

In [74]:
res = np.concatenate(results,axis=0)
choices = np.argmax(res,axis=1)
choices.shape

(10,)

In [77]:
getlabel = lambda x:labels[x]
vgetlabel = np.vectorize(getlabel)
times = vgetlabel(choices)
id_data['time'] = times

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  id_data['time'] = times


In [78]:
id_data['time'].value_counts()

morning              5
afternoon            2
sunrise or sunset    2
noon                 1
Name: time, dtype: int64