In [2]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import pandas as pd
from io import BytesIO


def remove_duplicates(string):
    words = string.split()  # Split the string into words
    unique_words = list(dict.fromkeys(words))  # Remove duplicates
    return ' '.join(unique_words)  # Join unique words back into a string

# Create empty lists to store data
sku_list = []
img_url_list = []
conditional_captions = []
unconditional_captions = []

# Replace 'your_file.csv' with the actual file path
file_path = 'sampled_products_10_each_img_front.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame
dfImage = df.loc[df['ItemDocumentNote'] == "upc_img_pth_frnt", ["ItemSku", "ItemDocumentValue"]]
print(dfImage)

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")




for index, row in dfImage.iterrows():
    sku = row["ItemSku"]
    img_url = row["ItemDocumentValue"] 
    if img_url == 'null':
        continue
    
    try:
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'}
        response = requests.get(img_url, headers=headers, stream=True, timeout=10)
        raw_image = Image.open(response.raw).convert('RGB')
        response.raise_for_status()  # Raises an HTTPError for bad responses
        

    # Rest of your code
    except Exception as e:
        print(f"Error: {e}")
        continue

    print(f"-----\n{index}")
    print(f"ItemSku: {sku}")
    print(img_url)

    # Store information in lists
    sku_list.append(sku)
    img_url_list.append(img_url)
    # conditional image captioning
    text = "Image captioning: "
    inputs = processor(raw_image, text, return_tensors="pt")

    out_conditional = model.generate(**inputs, max_new_tokens=30)
    conditional_caption = processor.decode(out_conditional[0], skip_special_tokens=True)
    conditional_caption = remove_duplicates(conditional_caption)
    conditional_captions.append(conditional_caption)
    print(conditional_caption)

    # unconditional image captioning
    inputs = processor(raw_image, return_tensors="pt")

    out_unconditional = model.generate(**inputs, max_new_tokens=30)
    unconditional_caption = processor.decode(out_unconditional[0], skip_special_tokens=True)
    unconditional_caption = remove_duplicates(unconditional_caption)
    unconditional_captions.append(unconditional_caption)
    print(unconditional_caption)
    
    # Create a new DataFrame
    result_df = pd.DataFrame({
        "ItemSku": sku_list,
        "ItemDocumentValue": img_url_list,
        "ConditionalCaption": conditional_captions,
        "UnconditionalCaption": unconditional_captions
    })

    # Save the DataFrame to a CSV file
    result_df.to_csv("BLIP-output_captions80.csv", index=False)

        ItemSku                                  ItemDocumentValue
0   19283315462  http://static.meijer.com/Media/001/92833/00192...
1   88392962061  http://static.meijer.com/Media/008/83929/00883...
2    4549659042  http://static.meijer.com/Media/000/45496/00045...
3   63806007772  http://static.meijer.com/Media/006/38060/00638...
4    5389114549  http://static.meijer.com/Media/000/53891/00053...
..          ...                                                ...
75   4125052533  http://static.meijer.com/Media/004/12505/00412...
76  70414200007  http://static.meijer.com/Media/007/04142/00704...
77   4125056427  http://static.meijer.com/Media/000/41250/00041...
78  71373320204  http://static.meijer.com/Media/007/13733/00713...
79  88692647395  http://static.meijer.com/Media/008/86926/00886...

[80 rows x 2 columns]

-----
0
ItemSku: 19283315462
http://static.meijer.com/Media/001/92833/0019283315462_0_A1C1_0600.png
image captioning :iane yankee candle
yankee candle - vanilla
-----
1
Ite



image captioning :ianianianianianianianianianianianianianianianianianianianianianianianianianianianianianian
women's performance tank top
-----
36
ItemSku: 4529904805
http://static.meijer.com/Media/000/45299/0004529904805_0_A1C1_0600.png
image captioning :ing lo surprise
lo surprise pack of 6
-----
37
ItemSku: 19416456511
http://static.meijer.com/Media/001/94164/0019416456511_0_A1C1_0600.png
image captioning :s comtional com
hans comfort fit micro briefs
-----
38
ItemSku: 68198377424
http://static.meijer.com/Media/006/81983/0068198377424_0_A1C1_0600.png
image captioning :s clipart clip
a colorful polka dot print lan
-----
39
ItemSku: 70882062294
http://static.meijer.com/Media/007/08820/0070882062294_0_A1C1_0600.png
image captioning :s mens full cushion crew socks
men's performance crew socks
-----
40
ItemSku: 71928317783
http://static.meijer.com/Media/007/19283/0071928317783_1_A1C1_0600.png
image captioning :in paper plates
a blue and green paper plate with the words'mere '
-----
41
It



image captioning : fruit
a cup of fruit with white background
-----
66
ItemSku: 21278600000
http://static.meijer.com/Media/002/12786/0021278600000_0_A1C1_0600.png
image captioning :infoionistly
a bowl of pasta with white background
-----
67
ItemSku: 70882002471
http://static.meijer.com/Media/007/08820/0070882002471_0_A1C1_0600.png
image captioning :in cherry pie
a box of cherry pie
-----
68
ItemSku: 21784400000
http://static.meijer.com/Media/002/17844/0021784400000_2_A1C1_0600.png
image captioning : food blog
a bowl of chicken salad with grapes and apples
-----
69
ItemSku: 7146402280
http://static.meijer.com/Media/000/71464/0007146402280_0_A1C1_0600.png
image captioning :infoionist soy honey ranch
a bottle of salad sauce with red cap
-----
70
ItemSku: 38290844003
http://static.meijer.com/Media/003/82908/0038290844003_0600.png




image captioning :in butral buturail bu
a box of b - utafil
-----
71
ItemSku: 71373330351
http://static.meijer.com/Media/071/37333/0713733303514_a1c1_0600.png
image captioning :ianianianianianianianianianianianianianianianianianianianianianianianianianianianianianian
moist
-----
72
ItemSku: 4116743106
http://static.meijer.com/Media/000/41167/0004116743106_0600.png




image captioning :in algrad allergy congestion relief
algra allergy congestion relief
-----
73
ItemSku: 76023619748
http://static.meijer.com/Media/076/02361/0760236197485_a1r1_0600.png
image captioning :in menstrip womencentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcentcent
women's century multi multivitant supplement
-----
74
ItemSku: 71928319407
http://static.meijer.com/Media/007/19283/0071928319407_0600.png




image captioning :incli com
melphin, 1 hour, hour
-----
75
ItemSku: 4125052533
http://static.meijer.com/Media/004/12505/0041250525332_a1c1_0600.png
image captioning :in petilililililililililililililililililililililililililililil
a jar of pure petroleum
-----
76
ItemSku: 70414200007
http://static.meijer.com/Media/007/04142/0070414200007_1_A1C1_0600.png
image captioning :inflioning flastor fl
the front of a box flastor daily supplement
-----
77
ItemSku: 4125056427
http://static.meijer.com/Media/000/41250/0004125056427_0600.png




image captioning :ian vitamin supplement
vitamin b - 10mg tablets
-----
78
ItemSku: 71373320204
http://static.meijer.com/Media/007/13733/0071373320204_1_A1C1_0600.png
image captioning :in fish oil
fish oil for omega
-----
79
ItemSku: 88692647395
http://static.meijer.com/Media/008/86926/0088692647395_2_A1C1_0600.png
image captioning :in herbal health product reviews
a bottle of cetro extract
