In [12]:
import os
from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential
import pandas as pd
import numpy as np
import re

In [2]:

# Read the CSV files into pandas DataFrames
df_active_upc = pd.read_csv('ActiveUPCMarketingInformation.csv')
df_upc_images = pd.read_csv('UPCImages.csv')

# Join the DataFrames on 'ItemSku'
merged_df = pd.merge(df_active_upc, df_upc_images, on='ItemSku', how='inner')

In [3]:
image_yes_desc_yes = merged_df[(merged_df['ItemDocumentValue'].notnull()) & (merged_df['MarketingFeatures'].notnull())]
image_yes_desc_no = merged_df[(merged_df['ItemDocumentValue'].notnull()) & (merged_df['MarketingFeatures'].isnull())]
image_no_desc_yes= merged_df[(merged_df['ItemDocumentValue'].isnull()) & (merged_df['MarketingFeatures'].notnull())]
image_no_desc_no = merged_df[(merged_df['ItemDocumentValue'].isnull()) & (merged_df['MarketingFeatures'].isnull())]

# image_yes_desc_no

In [4]:
# Get unique values from the 'ItemSku' column
unique_values = image_yes_desc_no['ItemSku'].unique()

# Select 10 unique values randomly
random_unique_values = pd.Series(unique_values).sample(n=10, random_state=42).tolist()

ProductName = []
ItemDocumentValue = []

for sku in random_unique_values:
    # Filter the DataFrame for the current SKU
    sku_df = image_yes_desc_no[image_yes_desc_no['ItemSku'] == sku]
    
    # Extract the ProductName for the current SKU
    product_name = sku_df['ProductName'].iloc[0]  # Assuming there's only one ProductName per SKU
    ProductName.append(product_name.strip())
    
    # Extract the ItemDocumentValue for the current SKU
    item_values = sku_df['ItemDocumentValue'].tolist()
    ItemDocumentValue.append(item_values)


In [5]:
# Create an Image Analysis client
client = ImageAnalysisClient(
    endpoint='https://ipmicro-ocr.cognitiveservices.azure.com/',
    credential=AzureKeyCredential('2c5aad069013400f81faae0c05fd5871')
)

In [6]:
# Get a caption for the image. This will be a synchronously (blocking) call.
captions = [0]*len(ItemDocumentValue)
desc = [0]*len(ItemDocumentValue)

for i in range(0,len(ItemDocumentValue)):
    temp_des = ''
    caption = ''
    for url in ItemDocumentValue[i]:
        result = client.analyze_from_url(
            image_url=url,
            visual_features=[VisualFeatures.CAPTION, VisualFeatures.READ, VisualFeatures.TAGS],
            gender_neutral_caption=True,  # Optional (default is False)
        )

        if result.caption is not None:
            caption = caption + " | " + result.caption.text
            
            
        if result.read is not None:
            try:
                for line in result.read.blocks[0].lines:
                    temp_des = temp_des + ' '+ line.text
            except:
                np.nan
    desc[i] = temp_des
    captions[i] = caption 

In [7]:
import requests
import json
api_key = "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"

In [8]:
system_prompt = "You are a helpful AI Assistant. You are working for a national retailer whose Brand Voice is characterized as friendly, informative, and trustworthy. At the core of retailer's brand lies a commitment to clarity and comprehension. When drafting product descriptions, it is essential to directly mirror the product's features as they are officially listed. Every feature should be presented precisely and understandably in the description to avoid any confusion. For example, if a product is listed with “Organic Cotton, 300 Thread Count,” it should be stated exactly as such, not as Natural Cotton, 300TC. Generate the product descriptions.Also give the confidence score out of 100% for the response should be in the format \"Confidence score: x%\""

In [9]:
response = [0]*len(ProductName)
for i in range(len(ProductName)):
    prompt = f"product name is {ProductName[i]} and information captured from the images are {desc[i]}. The caption from a Vision AI described the image as {captions[i]}"
    # Send the request with both "system" and "user" roles
    response[i] = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {api_key}",
            "HTTP-Referer": "YOUR_SITE_URL",  # Optional, replace YOUR_SITE_URL with your actual site URL.
            "X-Title": "YOUR_APP_NAME",  # Optional, replace YOUR_APP_NAME with your actual app name.
        },
        data=json.dumps({
            "model": "mistralai/mistral-7b-instruct:free",  # Optional
            "messages": [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ]
        })
    )



In [10]:
content = []
for i in range(len(ProductName)):
    content.append(response[i].json()['choices'][0]['message']['content'])

In [13]:
confidence_scores = []
for i in range(len(ProductName)):
    # Using regular expression to extract the confidence score
    confidence_score_pattern = r'Confidence score: (\d+)%'
    confidence_score_match = re.search(confidence_score_pattern, content[i])

    if confidence_score_match:
        confidence_scores.append(confidence_score_match.group(1))
        
    else:
        confidence_scores.append("Not found")


# Relevance: Cosine Similarity

In [14]:
from collections import Counter
import math

def preprocess_text(text):
    # Tokenize the text into words, remove punctuation, and convert to lowercase
    words = text.lower().split()
    words = [word.strip('.,?!') for word in words]
    return words

def compute_cosine_similarity(vector1, vector2):
    # Compute the dot product of the two vectors
    dot_product = sum(vector1[key] * vector2.get(key, 0) for key in vector1)
    
    # Compute the magnitude of each vector
    magnitude1 = math.sqrt(sum(val ** 2 for val in vector1.values()))
    magnitude2 = math.sqrt(sum(val ** 2 for val in vector2.values()))
    
    # Compute the cosine similarity
    cosine_similarity = dot_product / (magnitude1 * magnitude2)
    return cosine_similarity




In [15]:
cosine_similarity = []
for i in range(len(ProductName)):
    prompt = f"product name is {ProductName[i]} and information captured from the images are {desc[i]}. The caption from a Vision AI described the image as {captions[i]}. Also give the confidence score out of 100% of the response in the format, Confidence score: xx%"
    
# Define the two paragraphs
    paragraph1 = prompt
    paragraph2 = content[i]

    # Preprocess the paragraphs
    words1 = preprocess_text(paragraph1)
    words2 = preprocess_text(paragraph2)

    # Compute word frequencies for each paragraph
    word_freq1 = Counter(words1)
    word_freq2 = Counter(words2)

    # Compute cosine similarity
    similarity = compute_cosine_similarity(word_freq1, word_freq2)
    cosine_similarity.append(similarity)

In [40]:
prompt1 = f"product name is {ProductName[1]} and information captured from the images are {desc[1]}. The caption from a Vision AI described the image as {captions[1]}. Also give the confidence score out of 100% of the response in the format, Confidence score: xx%"
prompt1

"product name is HANES 6PR INVISBL CMFRT BAL LINER HC726B PASTL 5-9 and information captured from the images are  6 women's shoe size pair Hanes 5-9 Mode In China invisible comfort cool comfort™ COOL. DRY. BREATHABLE. ballerina liner Heels Stay. All Day.. The caption from a Vision AI described the image as  | a pack of socks with a label. Also give the confidence score out of 100% of the response in the format, Confidence score: xx%"

In [41]:
content[1]

'Confidence score: 90%\n\n"Introducing the Hanes 6PR InvisiBL Comfort Bralley Liners, designed to provide superior comfort for all-day wear. This pastel-colored pack of socks features one pair of invisible comfort cool comfort™ COOL, DRY, BREATHABLE. ballerina-style liners. The liners have been designed to provide heel-hugging support, keeping your feet still in place all day long. Each liner is perfect for women\'s shoe sizes 5-9, ensuring a truly comfortable and enjoyable experience. Get yourself the perfect comfort solution with Hanes 6PR InvisiBL Comfort Bralley Liners HC726B PASTL 5-9."'

# Clarity

In [16]:
import re
from syllapy import count as count_syllables

def count_words(text):
    words = re.findall(r'\b\w+\b', text)
    return len(words)

def count_sentences(text):
    sentences = re.split(r'[.!?]', text)
    return len(sentences)

def count_complex_words(text):
    words = re.findall(r'\b\w+\b', text)
    complex_words = [word for word in words if len(re.findall(r'[aeiouy]+', word.lower())) > 2]
    return len(complex_words)

def calculate_flesch_kincaid_grade_level(words, sentences, syllables):
    return 0.39 * (words / sentences) + 11.8 * (syllables) - 15.59

def calculate_gunning_fog_index(words, sentences, complex_words):
    return 0.4 * ((words / sentences) + 100 * (complex_words / words))




In [17]:
flesch_kincaid_grade_level = []
gunning_fog_index = []
for i in range(len(ProductName)):
    text = content[i]
    num_words = count_words(text)
    num_sentences = count_sentences(text)
    num_complex_words = count_complex_words(text)

    # Counting actual number of syllables using syllapy
    total_syllables = sum(count_syllables(word) for word in re.findall(r'\b\w+\b', text))

    # Avg syllables per word
    avg_syllables_per_word = total_syllables / num_words

    FK_grade_level = calculate_flesch_kincaid_grade_level(num_words, num_sentences, avg_syllables_per_word)
    GF_index = calculate_gunning_fog_index(num_words, num_sentences, num_complex_words)
    flesch_kincaid_grade_level.append(FK_grade_level)
    gunning_fog_index.append(GF_index)

In [42]:
flesch_kincaid_grade_level

[9.838447488584478,
 8.60246200607903,
 10.065485714285717,
 8.500102739726028,
 10.600675675675678,
 8.545113636363638,
 20.617948717948718,
 11.466434782608694,
 11.439878253568434,
 7.249285714285715]

# AIDA Framework

In [25]:
system_prompt2 = "You are a helpful AI Assistant. You are working for a national retailer. They have generated a product descriptions and your job is to judge if the description generated is able to capture the attention, interest of the consumer. Also, judge if the description is able to cultivate desire and prompt the potential customer take action such as making a purchase. For each of these four: Attention, interest, desire and action give a score out of 10 in the format Attention: x/10 etc. Do not generate any response other than the scores for those four attributes. The output should be the scores out of 10"

In [26]:
response = [0]*len(content)
for i in range(len(content)):
    prompt2 = f"product description generated is {content[i]}"
# Send the request with both "system" and "user" roles
    response[i] = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {api_key}",
            "HTTP-Referer": "YOUR_SITE_URL",  # Optional, replace YOUR_SITE_URL with your actual site URL.
            "X-Title": "YOUR_APP_NAME",  # Optional, replace YOUR_APP_NAME with your actual app name.
        },
        data=json.dumps({
            "model": "mistralai/mistral-7b-instruct:free",  # Optional
            "messages": [
                {"role": "system", "content": system_prompt2},
                {"role": "user", "content": prompt2}
            ]
        })
    )


In [27]:
aida_framework = []
for i in range(len(ProductName)):
    aida_framework.append(response[i].json()['choices'][0]['message']['content'])

In [28]:
aida_framework

['Attention: 10/10\nInterest: 7/10\nDesire: 6/10\nAction: 8/10',
 'Attention: 9/10\n\nInterest: 8/10\n\nDesire: 7/10\n\nAction: 8/10',
 'Attention: 8/10\nInterest: 7/10\nDesire: 7/10\nAction: 7/10',
 '* Attention: 9/10\n* Interest: 8/10\n* Desire: 6/10\n* Action: 7/10',
 'Attention: 8/10\nThe product description has a clear and attention-grabbing title "Meijer Lubricant Eye Drops" and it highlights the key benefits of the product, make it a good attention generator.\n\nInterest: 7/10\nThe product description gives a good overview of the product\'s features, how it works, and its benefits, but it could benefit from more engaging language to increase the reader\'s interest in the product.\n\nDesire: 6/10\nThe product description does emphasize the product\'s benefits and features, but it does not convey a strong sense of urgency or desire for the product.\n\nAction: 5/10\nThe product description provides safety instructions and expires date but rarely create such strong desire in custome

In [29]:
import re

# Provided text
attention = []
interest = []
desire = []
action = []

for i in range(len(content)):
    text = aida_framework[i]
# Extracting Attention, Interest, Desire, and Action scores
    attention.append(int(re.search(r'Attention: (\d+)/10', text).group(1)))
    interest.append(int(re.search(r'Interest: (\d+)/10', text).group(1)))
    desire.append(int(re.search(r'Desire: (\d+)/10', text).group(1)))
    action.append(int(re.search(r'Action: (\d+)/10', text).group(1)))


In [30]:
interest

[7, 8, 7, 8, 8, 7, 8, 8, 9, 7]

In [32]:
data = {
    'random_unique_values': random_unique_values,
    'content': content,
    'cosine_similarity': cosine_similarity,
    'flesch_kincaid_grade_level': flesch_kincaid_grade_level,
    'gunning_fog_index': gunning_fog_index,
    'attention': attention,
    'interest': interest,
    'desire': desire,
    'action': action,
    'confidence_scores': confidence_scores
}

# Create DataFrame
df = pd.DataFrame(data)
# Print DataFrame
df.head()


Unnamed: 0,random_unique_values,content,cosine_similarity,flesch_kincaid_grade_level,gunning_fog_index,attention,interest,desire,action,confidence_scores
0,4125056000.0,Confidence score: 100%\n\nProduct Description:...,0.578626,9.838447,14.95764,10,7,6,8,100
1,3825771000.0,"Confidence score: 90%\n\n""Introducing the Hane...",0.479171,8.602462,13.456535,9,8,7,8,90
2,8595516000.0,"Kryptonics 36"" Longboard:\nOur Kryptonics 36"" ...",0.556091,10.065486,16.102857,8,7,7,7,90
3,3993824000.0,Product Name: Pittsburgh Steelers Lunch Plate\...,0.695894,8.500103,11.683562,9,8,6,7,90
4,73899430000.0,Description:\nThe Hanes Girl Bikini UW GP10BK ...,0.698883,10.600676,16.318919,7,8,6,6,99


In [33]:
df

Unnamed: 0,random_unique_values,content,cosine_similarity,flesch_kincaid_grade_level,gunning_fog_index,attention,interest,desire,action,confidence_scores
0,4125056000.0,Confidence score: 100%\n\nProduct Description:...,0.578626,9.838447,14.95764,10,7,6,8,100
1,3825771000.0,"Confidence score: 90%\n\n""Introducing the Hane...",0.479171,8.602462,13.456535,9,8,7,8,90
2,8595516000.0,"Kryptonics 36"" Longboard:\nOur Kryptonics 36"" ...",0.556091,10.065486,16.102857,8,7,7,7,90
3,3993824000.0,Product Name: Pittsburgh Steelers Lunch Plate\...,0.695894,8.500103,11.683562,9,8,6,7,90
4,73899430000.0,Description:\nThe Hanes Girl Bikini UW GP10BK ...,0.698883,10.600676,16.318919,7,8,6,6,99
5,76023630000.0,Meijer Lubricant Plus Eye Drp Vial 30CT\n=====...,0.75268,8.545114,13.322727,8,7,6,5,Not found
6,76023690000.0,Product name: Meijer Travel Soap Box - Plastic...,0.39615,20.617949,28.933333,9,8,7,6,100
7,7161137000.0,Pennzoil Platinum Full Synthetic High Mileage ...,0.557176,11.466435,19.09913,9,8,7,7,95
8,30997020000.0,ALMAY Clear Complexion Concealer ALMAY Clear C...,0.514461,11.439878,17.084187,9,9,9,8,100
9,1223620000.0,Product Description: Young Guns BD is a great ...,0.52737,7.249286,12.142857,9,7,7,7,80


In [37]:

# Define function to determine pass/fail
def pass_fail(row):
    if (row['cosine_similarity'] > 0.4 and
        row['flesch_kincaid_grade_level'] < 15 and
        row['gunning_fog_index'] < 20 and
        row['attention'] > 5 and
        row['interest'] > 5 and
        row['desire'] > 5 and
        row['action'] > 5):
        return 'Pass'
    else:
        return 'Fail'

# Apply the function to create 'Pass/Fail' column
df['Pass/Fail'] = df.apply(pass_fail, axis=1)


In [38]:
df

Unnamed: 0,random_unique_values,content,cosine_similarity,flesch_kincaid_grade_level,gunning_fog_index,attention,interest,desire,action,confidence_scores,Pass/Fail
0,4125056000.0,Confidence score: 100%\n\nProduct Description:...,0.578626,9.838447,14.95764,10,7,6,8,100,Pass
1,3825771000.0,"Confidence score: 90%\n\n""Introducing the Hane...",0.479171,8.602462,13.456535,9,8,7,8,90,Pass
2,8595516000.0,"Kryptonics 36"" Longboard:\nOur Kryptonics 36"" ...",0.556091,10.065486,16.102857,8,7,7,7,90,Pass
3,3993824000.0,Product Name: Pittsburgh Steelers Lunch Plate\...,0.695894,8.500103,11.683562,9,8,6,7,90,Pass
4,73899430000.0,Description:\nThe Hanes Girl Bikini UW GP10BK ...,0.698883,10.600676,16.318919,7,8,6,6,99,Pass
5,76023630000.0,Meijer Lubricant Plus Eye Drp Vial 30CT\n=====...,0.75268,8.545114,13.322727,8,7,6,5,Not found,Fail
6,76023690000.0,Product name: Meijer Travel Soap Box - Plastic...,0.39615,20.617949,28.933333,9,8,7,6,100,Fail
7,7161137000.0,Pennzoil Platinum Full Synthetic High Mileage ...,0.557176,11.466435,19.09913,9,8,7,7,95,Pass
8,30997020000.0,ALMAY Clear Complexion Concealer ALMAY Clear C...,0.514461,11.439878,17.084187,9,9,9,8,100,Pass
9,1223620000.0,Product Description: Young Guns BD is a great ...,0.52737,7.249286,12.142857,9,7,7,7,80,Pass
