In [1]:
import torch
import clip
from PIL import Image
import matplotlib.pyplot as plt
import openai
import requests
from io import BytesIO
import torchvision
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [None]:
image = preprocess(Image.open("clip.jpg")).unsqueeze(0).to(device)
text = clip.tokenize(["eiffel tower", "pisa tower", "burj khalifa"]).to(device)

In [None]:
plt.imshow(image.squeeze().permute(1, 2, 0))

In [None]:
with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    
    logits_per_image, logits_per_text = model(image, text)
    probs = logits_per_image.softmax(dim=-1).cpu().numpy()

print("Label probs:", probs)

In [None]:
# Load the GPT-2 model and tokenizer
model_name = 'gpt2-medium'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

In [None]:
# Define the function that generates features for a category

def generate_features(category):
    # Generate the input text for the GPT-2 model
    input_text = f"What are the visual characteristics of {category}?"
    input_ids = tokenizer.encode(input_text, return_tensors='pt')

    # Generate text from the GPT-2 model
    output = model.generate(input_ids, max_length=100,pad_token_id=tokenizer.eos_token_id, do_sample=True, temperature=0.7)
    output_text = tokenizer.decode(output[0], skip_special_tokens=True)



    return output_text

# Example usage
category = 'store'
features = generate_features(category)
print(features)

In [7]:
from transformers import pipeline, set_seed
generator = pipeline('text-generation', model='gpt2-large')
set_seed(42)


In [19]:
def generate_features(category):
    prompt = f"What are 5 features that distinguish {category} in an image?"
    features=generator(prompt, max_length=100, num_return_sequences=1)
    return features


In [20]:
features=generate_features('airplane')

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [21]:
features[0]['generated_text']

'What are 5 features that distinguish airplane in an image?\n\nIn this article, we will take a look at the 5 features that distinguish an airplane in an image from other images (in our view at least). The purpose of this article is to demonstrate the differences between a simple airframe and a complex one by showing how these features have been added to aviation history, and how we could do the same with modern airplanes and their digital image recorders.\n\nSimple airframes\n\nThe simplest'