In [None]:
import os
import time
import torch
import pandas as pd
from PIL import Image

In [None]:
onehot_df = pd.read_csv("datasets/cropped_medium_one_hot.csv", index_col=0)
labels_df = onehot_df.copy()

In [None]:
one_hot_columns = onehot_df.columns.tolist()


def one_hot_to_class_list(row):
    return [col for col in one_hot_columns if row[col] == 1]


labels_df['labels'] = onehot_df.apply(
    one_hot_to_class_list, axis=1
)
labels_df = labels_df[['file_path', 'labels']]

In [None]:
labels_df.to_csv("datasets/VLM_labels_df.csv")

In [None]:
import torch
from PIL import Image
from transformers import AutoModel, CLIPImageProcessor

model = AutoModel.from_pretrained(
    'OpenGVLab/InternViT-6B-448px-V1-5',
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True).cuda().eval()

image = Image.open('./examples/image1.jpg').convert('RGB')

image_processor = CLIPImageProcessor.from_pretrained('OpenGVLab/InternViT-6B-448px-V1-5')

pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
pixel_values = pixel_values.to(torch.bfloat16).cuda()

outputs = model(pixel_values)

In [None]:
from prompts.prompts import dynamic_prompt
from cog_vlm_batch import collate_fn, recur_move_to, device, tokenizer, model

image_folder = "/workspace/images"

# from utils import SRC_DIR
# image_folder = SRC_DIR

In [None]:
data = labels_df.file_path.values.tolist()
length = len(data)
batch_size = 16

answers = []
for idx in range(0, length, batch_size):
    input_sample_list = []
    start = time.time()
    for i in data[idx:idx + batch_size]:
        image = Image.open(os.path.join(image_folder, i)).convert('RGB')
        categories = labels_df[labels_df.file_path == i].values[0]
        input_sample = model.build_conversation_input_ids(
            tokenizer,
            query=dynamic_prompt(categories),
            history=[],
            images=[image],
            template_version='chat'
        )
        input_sample_list.append(input_sample)
    print(f"Prepare input time: {time.time() - start}")

    start = time.time()
    input_batch = collate_fn(input_sample_list, tokenizer)
    input_batch = recur_move_to(
        input_batch, device, lambda x: isinstance(x, torch.Tensor)
    )
    input_batch = recur_move_to(
        input_batch, torch.bfloat16, lambda x: isinstance(x, torch.Tensor) and torch.is_floating_point(x)
    )
    print(f"Prepare batch time: {time.time() - start}")

    gen_kwargs = {
        "max_new_tokens": 2048,
        "pad_token_id": 128002
    }

    start = time.time()
    with torch.no_grad():
        outputs = model.generate(**input_batch, **gen_kwargs)
        outputs = outputs[:, input_batch['input_ids'].shape[1]:]
        outputs = tokenizer.batch_decode(outputs)

    outlist = [output.split("<|end_of_text|>")[0].strip() for output in outputs]
    print(outlist)

    answers.append(outlist)
    
    print(f"Generate time: {time.time() - start}")
    break

In [None]:
# Given tags:
# - Hairy,Armpit,Latina,Tiny Tits,Undressing,Sexy,Amateur,Upskirt,Nipples,Socks,Pussy,Close Up,Legs,Lingerie,Dress,Panties,Ass,Babe,Brunette,Beautiful,Clothed,Spreading,Asshole,Solo,PAWG 
# Pick tags that describe the image.


In [None]:
# As an AI image tagging expert, analyze the given image containing explicit content.
# Your task is to categorize the image based on the given list of categories between the and tags.
# Ensure you understand the meaning of each category, including abbreviations, slang, and explicit terms.
# 
# ['Hairy', ' Armpit', ' Latina', ' Tiny Tits', ' Undressing', ' Sexy', ' Amateur', ' Upskirt', ' Nipples', ' Socks', ' Pussy', ' Close Up', ' Legs', ' Lingerie', ' Dress', ' Panties', ' Ass', ' Babe', ' Brunette', ' Beautiful', ' Clothed', ' Spreading', ' Asshole', ' Solo', ' PAWG'] 
# If you include any category not specified in the list, you will incur a penalty of $10.
# 
# Output Template:
# The output should be structured as a python string with ',' between categories. Example:
# category1, category2, category3, ..., category10

In [None]:
# As an AI image tagging expert, analyze the given image containing explicit content.
# Your task is to categorize the image based on the given list of categories between the and tags.
# Ensure you understand the meaning of each category, including abbreviations, slang, and explicit terms.
# 
# ['Hairy', ' Armpit', ' Latina', ' Tiny Tits', ' Undressing', ' Sexy', ' Amateur', ' Upskirt', ' Nipples', ' Socks', ' Pussy', ' Close Up', ' Legs', ' Lingerie', ' Dress', ' Panties', ' Ass', ' Babe', ' Brunette', ' Beautiful', ' Clothed', ' Spreading', ' Asshole', ' Solo', ' PAWG'] Step-by-Step Instructions: 1. Review the provided image: Carefully examine the image content. 2. Identify relevant categories: Refer to the list between and tags. 3. Assign categories: Select and assign as many relevant categories from the list as possible, ensuring they accurately reflect the image’s features or context. 4. Adhere to the list: Use only the categories specified in the list. Assign a maximum of 10 categories per image.
# Important Note:
# If you include any category not specified in the list, you will incur a penalty of $10.
# 
# Output Template:
# The output should be structured as a python string with ',' between categories. Example:
# category1, category2, category3, ..., category10

In [None]:
# As an AI image tagging expert, analyze the given image containing explicit content.
# Your task is to fill json template and return as result:
# 
# Categories: ['Hairy','Armpit','Latina','TinyTits','Undressing','Sexy','Amateur','Upskirt','Nipples','Socks','Pussy','CloseUp','Legs','Lingerie','Dress','Panties','Ass','Babe','Brunette','Beautiful','Clothed','Spreading','Asshole','Solo','PAWG']
# If you include any category not specified in the list, you will incur a penalty of $10.
# 
# Output Template:
# The output should be structured as a python string with ',' between categories. Example:
# category1, category2, category3, ..., category10

In [None]:
# You will be given an image and a list of categories. Your task is to analyze the image and select the categories that best describe or reflect the content and context of the image. Ensure the output is always a list of categories.
# 
# Instructions:
# 	1.	Task:
# 	•	Carefully examine the image to understand its primary elements, context, and overall theme.
# 	•	From the provided list, pick the categories that most accurately describe the image.
# 	2.	Output:
# 	•	Provide a list of categories that reflect the image the most. The list should contain only the categories that are directly relevant to the image content.
# 
# Guidelines:
# 	•	Be as specific and relevant as possible when selecting the categories.
# 	•	If multiple categories apply, include all that are relevant.
# 	•	If none of the categories apply, provide an empty list.
# 
# Categories: 
# 	•	['Hairy','Armpit','Latina','TinyTits','Undressing','Sexy','Amateur','Upskirt','Nipples','Socks','Pussy','CloseUp','Legs','Lingerie','Dress','Panties','Ass','Babe','Brunette','Beautiful','Clothed','Spreading','Asshole','Solo','PAWG']
# 

In [2]:
# Reduce this list only to the ones that fit the image context:
# 
# ['Hairy pussy','Armpit','Latina','TinyTits','Undressing','Sexy','Amateur','Upskirt','Nipples','Socks','Pussy','CloseUp','Legs','Lingerie','Dress','Panties','Ass','Babe','Brunette','Beautiful','Clothed','Spreading','Asshole','Solo','PAWG']
# 
# Your output should be a non repeatable list with categories names that apply to the image. Output only the list.