# For Fine Tuning

In [None]:
torchrun --standalone --nproc_per_node=1 train.py --wandb-project finetune-2 --device-batch-size 2 --dataset data.json --max-samples 80 --images-path training-images --test-every 20 --test-size 8

# For Inference

In [None]:
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration
from peft import PeftModel, PeftConfig

IMAGE_PATH = "training-images/11.jpg"
PROMPT = "Please write a question or prompt for this image. The questions or prompts you write are just like what a user might write. The prompt/question should usually be related to the image, but may occasionally not, so as not to bias things. The prompts/questions you write cover the entire range of things users might write, including the entire range of ways users might write, english level, typos, grammar mistakes, etc."
MODEL_NAME = "fancyfeast/llama-joycaption-alpha-two-hf-llava"
LORA_PATH = "checkpoints/a65dbuqp/samples_64/model"  # Local directory path

# Load JoyCaption
processor = AutoProcessor.from_pretrained(MODEL_NAME)
llava_model = LlavaForConditionalGeneration.from_pretrained(MODEL_NAME, torch_dtype="bfloat16", device_map=0)

# Load LoRA adapter from local directory
try:
    # Load the adapter configuration
    adapter_config = PeftConfig.from_pretrained(LORA_PATH)

    # Load the LoRA adapter weights
    llava_model = PeftModel.from_pretrained(llava_model, LORA_PATH)
except Exception as e:
    raise ValueError(f"Failed to load LoRA adapter from {LORA_PATH}: {e}")

llava_model.eval()

with torch.no_grad():
    # Load image
    image = Image.open(IMAGE_PATH)

    # Build the conversation
    convo = [
        {
            "role": "system",
            "content": "You are a helpful image captioner.",
        },
        {
            "role": "user",
            "content": PROMPT,
        },
    ]

    # Format the conversation
    convo_string = processor.apply_chat_template(convo, tokenize=False, add_generation_prompt=True)
    assert isinstance(convo_string, str)

    # Process the inputs
    inputs = processor(text=[convo_string], images=[image], return_tensors="pt").to('cuda')
    inputs['pixel_values'] = inputs['pixel_values'].to(torch.bfloat16)

    # Generate the captions
    generate_ids = llava_model.generate(
        **inputs,
        max_new_tokens=300,
        do_sample=True,
        suppress_tokens=None,
        use_cache=True,
        temperature=0.6,
        top_k=None,
        top_p=0.9,
    )[0]

    # Trim off the prompt
    generate_ids = generate_ids[inputs['input_ids'].shape[1]:]

    # Decode the caption
    caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    caption = caption.strip()
    print(caption)

In [11]:
import pandas as pd

df = pd.read_json('product_details.json')
df.head()

Unnamed: 0,category_name,images,description
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...
2,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 3' x 4' (w x h)\nFlower ratio - 50...
3,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' (w x l x h)\nExcl...
4,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 3' x 4' (w x h)\nFlower ratio - 50...


In [12]:
df.shape

(329390, 3)

In [27]:
x = df.iloc[0,1]

In [28]:
x

['https://meragi-core-dev.s3.ap-south-1.amazonaws.com/product/15344/643439b9-c82a-42cd-9a07-d317b25efacc.jpg',
 'https://meragi-core-dev.s3.ap-south-1.amazonaws.com/product/15344/98308bc8-91cc-4290-9af7-42f773bd61a9.jpg']

In [20]:
y = df.iloc[1,2]
y

"Board size- 2' x 3' (w x h)\nFlower Ratio - 50% Art/N Bunch\nIncluded- 5 Vases and 1 Standee\nExcluded- All other Furniture, Props & Accessories\n *Please Note- Prices are subject to size, flower, fabric, and accessories used."

In [16]:
final_df = df.iloc[:1999, :]

In [17]:
final_df.head(2)

Unnamed: 0,category_name,images,description
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...


In [18]:
final_df.shape

(1999, 3)

In [23]:
final_df['DESC'] = df.apply(lambda x: {'category_name': x['category_name'], 'description': x['description']}, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['DESC'] = df.apply(lambda x: {'category_name': x['category_name'], 'description': x['description']}, axis=1)


In [24]:
final_df.head(2)

Unnamed: 0,category_name,images,description,DESC
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...,"{'category_name': 'Mantap', 'description': 'Ma..."
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc..."


In [52]:
final_df['image_link'] = final_df.apply(lambda x: x['images'][0] if len(x['images']) != 0 else None, axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['image_link'] = final_df.apply(lambda x: x['images'][0] if len(x['images']) != 0 else None, axis = 1)


In [53]:
final_df.head(2)

Unnamed: 0,category_name,images,description,DESC,image_link
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...,"{'category_name': 'Mantap', 'description': 'Ma...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...


In [58]:
j = 0
for i in final_df['images'].values:
    if len(i)==0:
        print(j)

    j += 1

588


In [59]:
final_df.isna().sum()

category_name    1
images           0
description      0
DESC             0
image_link       1
dtype: int64

In [60]:
final_df.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.dropna(inplace=True)


In [61]:
final_df.shape

(1997, 5)

In [63]:
final_df.head(2)

Unnamed: 0,category_name,images,description,DESC,image_link
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...,"{'category_name': 'Mantap', 'description': 'Ma...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...


In [101]:
import os
import requests

output_folder = 'downloaded_images'
os.makedirs(output_folder, exist_ok=True)

def download_image(url, folder):
    try:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            if url.split('/')[-1] != 'test.png' and url.split('/')[-1] != 'test.jpg' and url.split('/')[-1] != 'test.jpeg':
                filename = os.path.join(folder, url.split('/')[-1])
            else:
                z = url.split('/')[-2] + url.split('/')[-1]
                filename = os.path.join(folder, z)
            with open(filename, 'wb') as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)

            return filename.split('/')[-1]
        else:
            print(f"Failed to download {url}")
            return None

    except Exception as e:
        print(f"Error downloading {url}: {e}")
        return None

final_df['downloaded_image_path'] = final_df['image_link'].apply(lambda x: download_image(x, output_folder))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df['downloaded_image_path'] = final_df['image_link'].apply(lambda x: download_image(x, output_folder))


In [100]:
final_df.head()

Unnamed: 0,category_name,images,description,DESC,image_link
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...,"{'category_name': 'Mantap', 'description': 'Ma...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
2,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 3' x 4' (w x h)\nFlower ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
3,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' (w x l x h)\nExcl...,"{'category_name': 'Mantap', 'description': 'Ma...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
4,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 3' x 4' (w x h)\nFlower ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...


In [66]:
final_df['downloaded_image_path'].isna().sum()

0

In [96]:
final_df.drop(columns=['downloaded_image_path'], inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.drop(columns=['downloaded_image_path'], inplace = True)


In [97]:
final_df.head(2)

Unnamed: 0,category_name,images,description,DESC,image_link
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...,"{'category_name': 'Mantap', 'description': 'Ma...",https://meragi-core-dev.s3.ap-south-1.amazonaw...
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...


In [102]:
x = os.walk('downloaded_images')

In [103]:
j =0
for i in x:
    print(i)
    k = i[2]

('downloaded_images', [], ['e63db7bb-0baf-4478-b568-4318a35b5902.png', 'e3560d74-6071-418d-a733-d73f98b962d2.jpg', '52a9c9ea-acbf-49f5-9ab6-5cd9c8d1b9b7.jpg', '676test.png', '7994f58b-f716-473e-8f63-c7ab41561652.jpg', '0cde64d0-5f7e-4763-a6c4-1566c8c40662.jpg', '536b25f5-b60b-4585-a80e-a3260df1838a.jpg', '412a2dc0-5f2f-4b2f-af5a-9012be0560c0.jpg', 'ec67c8ec-2734-4f1e-9084-3d16551d772b.jpg', 'a541d6d2-5be2-40f9-8ad9-96f5e48caa86.jpg', '83984d33-9884-46ec-bd3a-27138b0ef06e.jpg', '1e6d41b8-09a0-4074-b789-b5af6e7b43ce.jpg', '62fc33e4-257a-4685-971a-0e1a25ee19c9.jpg', '071ad36c-6408-4ea5-91c3-18b3be4c3db7.jpg', '334bb425-d964-4a20-ad8c-fc7497ceb3a0.jpg', 'ede74b4c-7060-4599-ac07-5a07c7db0b09.jpg', '9d8101f4-937f-4141-9cc0-eaa14da40a71.jpg', '4656cb2d-fdc1-4bf3-82fb-82935a6a65f8.png', '744ee488-2960-4976-9724-4844c9c2baa8.png', '0e6da63c-0fdb-42a7-83a0-9f9c20fe1a13.jpg', 'd6d3e494-fa17-41cf-91af-0f6820f25ab9.jpg', '8e95e36d-aaa3-408e-ac9a-722067ab347d.jpg', 'c4dfd753-5e5e-4344-8b99-2a64869d9

In [104]:
len(k)

809

In [105]:
final_df.shape

(1997, 6)

In [106]:
final_df['downloaded_image_path'].isna().sum()

0

In [88]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 5000)

In [95]:
x = final_df['images'][113]
x

['https://meragi-core-dev.s3.ap-south-1.amazonaws.com/product/275/test.png']

In [110]:
final_df.shape

(1997, 6)

In [111]:
final_df.drop_duplicates(subset=['downloaded_image_path'], keep='first', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df.drop_duplicates(subset=['downloaded_image_path'], keep='first', inplace=True)


In [112]:
final_df = final_df.reset_index(drop=True)

In [114]:
final_df.head(2)

Unnamed: 0,category_name,images,description,DESC,image_link,downloaded_image_path
0,Mantap,[https://meragi-core-dev.s3.ap-south-1.amazona...,Mandap Size- 12' x 10' x 12' ( w x h x l )\nIn...,"{'category_name': 'Mantap', 'description': 'Ma...",https://meragi-core-dev.s3.ap-south-1.amazonaw...,643439b9-c82a-42cd-9a07-d317b25efacc.jpg
1,Welcome Board Decor,[https://meragi-core-dev.s3.ap-south-1.amazona...,Board size- 2' x 3' (w x h)\nFlower Ratio - 50...,"{'category_name': 'Welcome Board Decor', 'desc...",https://meragi-core-dev.s3.ap-south-1.amazonaw...,52a9c9ea-acbf-49f5-9ab6-5cd9c8d1b9b7.jpg


In [115]:
test_df = final_df[['DESC', 'downloaded_image_path']]

In [118]:
test_df.iloc[0,0]

{'category_name': 'Mantap',
 'description': "Mandap Size- 12' x 10' x 12' ( w x h x l )\nIncluded- \n50% Ar/N Flower Ratio & 3 Side Beam Decor \nExcluded- Platform, Lights, Furniture & all other Props & Accessories\n*Please Note - Prices are subject to size, flower, fabric, and accessories used."}

In [116]:
test_df.shape

(809, 2)

In [None]:
final_prompt = "Analyze the image and generate a structured dictionary containing decorative items present.\n\nRules to Follow:\n1. Ignore People: Do not mention any person/people in the image. Focus solely on décor elements.\n2. Structured Output: Return the response as a dictionary with the following keys:\n   - category_name: The name of the decorative item (e.g., 'Mantap', 'Centerpiece').\n   - description: Detailed specifications, including size, materials, inclusions, and exclusions.\n3. Item Counting: If multiple instances of an item exist, include the count in parentheses. Example: 'Basket(3)' instead of 'Basket'.\n4. Detail-Oriented: Include measurements, material types, and any relevant notes about included/excluded items or conditions (e.g., 'Platform not included').\n5. Clarity: Use simple, direct language for object names and descriptions."
prompt = '"Task: Analyze the image and generate a dictionary of decorative items present.\n\n Rules to Follow:  \n1. Ignore People: If any person/people are present in the image, do NOT mention them. Only focus on décor elements.  \n2. Structured Output: Return the response in a clean, in dictionary format where first key is category_name and the second is description.  \n3. Item Counting: If multiple instances of the same item are present, include the count in parentheses.  \n   - Example: If there are three baskets, write \"Basket(3)\" instead of just \"Basket.\"  \n4. Maintain Clarity: Use simple and precise object names."'