# Neural Proje

### Imports

In [2]:
import h5py
import torch
import numpy as np
import matplotlib.pyplot as plt
import random
import clip
from PIL import Image
import requests
from importlib import reload
import io
# Import our custom modules
import image_loader
reload(image_loader)

<module 'image_loader' from '/scratch/arda/proj/image_loader.py'>

### Read Data From H5 Files

In [3]:
with h5py.File("eee443_project_dataset_train.h5", "r") as f:
    print("Keys: %s" % f.keys())
    train_cap = np.array(f["train_cap"])
    train_imid = np.array(f["train_imid"])
    #train_ims = np.array(f["train_ims"])
    train_url = np.array(f["train_url"])
    word_code = np.array(f["word_code"])
words = np.array(word_code.dtype.names)
word_indices = np.array(list(word_code[0]), dtype=np.int32)
with h5py.File("eee443_project_dataset_test.h5", "r") as f:
    print("Keys: %s" % f.keys())
    test_cap = np.array(f["test_caps"])
    test_imid = np.array(f["test_imid"])
    test_ims = np.array(f["test_ims"])
    test_url = np.array(f["test_url"])
train_cap.shape, train_imid.shape, train_url.shape, test_cap.shape, test_imid.shape, test_url.shape

Keys: <KeysViewHDF5 ['train_cap', 'train_imid', 'train_ims', 'train_url', 'word_code']>
Keys: <KeysViewHDF5 ['test_caps', 'test_imid', 'test_ims', 'test_url']>


((400135, 17), (400135,), (82783,), (195954, 17), (195954,), (40504,))

### Train/Validation Split

In [3]:
N = train_cap.shape[0]
validation_amount = int(N * 0.15)
validation_indices = random.sample(range(N), validation_amount)
validation_mask = np.zeros(N)
validation_mask[validation_indices] = 1
train_X = train_cap[validation_mask == 0]
train_Y = train_imid[validation_mask == 0]
valid_X = train_cap[validation_mask == 1]
valid_Y = train_imid[validation_mask == 1]
del validation_mask, validation_indices, validation_amount

In [4]:
word_indices_sorter = np.argsort(word_indices)
words = words[word_indices_sorter]
words
del word_indices_sorter, word_code

### Testing CLIP

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocessor = clip.load("ViT-B/32", device=device)

### Load or Calculate Tokenizeded Captions

In [1]:
try:
    all_tokenized_captions = torch.load("tensors/all_tokenized_captions.pt", map_location="cpu")
except FileNotFoundError:
    ends = np.where(train_cap == 2)[1]
    all_caption =  [""] * N
    for i in range(len(train_cap)):
        cap_int = train_cap[i,1:ends[i]]
        cap_int = [cap for cap in cap_int if cap not in [0,1,2,3]]
        cap = " ".join(words[cap_int])
        all_caption[i] = cap
    all_tokenized_captions = clip.tokenize(all_caption).to(device)

NameError: name 'torch' is not defined

### Load or Calculate Text Features

In [7]:
try:
    encoded_captions = torch.load("tensors/encoded_captions.pt").to(device)
except FileNotFoundError:
    encoded_captions = torch.empty((N,512), device=device)
    TEXT_ENCODE_BATCH = 1000
    with torch.no_grad():
        for i in range(N//100):
            encoded_captions[i*TEXT_ENCODE_BATCH:(i+1)*TEXT_ENCODE_BATCH] = model.encode_text(all_tokenized_captions[i*TEXT_ENCODE_BATCH:(i+1)*TEXT_ENCODE_BATCH]).float()
            print(f"Encoded {i*TEXT_ENCODE_BATCH} captions", end="\r")

### Load or Calculate Image Features

In [8]:
img = preprocessor(Image.open("clip0.jpg")).unsqueeze(0).to(device)
with torch.no_grad():
    img_feat = model.encode_image(img).float()

In [None]:
IMS_SIZE = train_url.shape[0]
image_features = torch.empty((IMS_SIZE,512), device=device)
with torch.no_grad():
    for i in range(IMS_SIZE):
        r = requests.get(train_url[i])
        if r.status_code != 200:
            continue
        image = Image.open(io.BytesIO(r.content))
        image = preprocessor(image).unsqueeze(0).to(device)
        image_features[i] = model.encode_image(image).float()
        print(f"Encoded {i}/{IMS_SIZE} images", end="\r")
