In [2]:
import sys, os
sys.path.append(os.path.join(os.path.dirname('__file__'), '..', 'DB_and_Azure'))
import sql_db_functions as SQLf

import pandas as pd
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 

import getpass
from apikey import apikey 


In [3]:
def restart_product_char():
    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = """
    TRUNCATE TABLE product_characteristics;
    """
    cursor.execute(query)

    conn.commit()

In [4]:
def get_products():

    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = f"SELECT Brand_Prod_ID, Descript  FROM Products ;" #LIMIT 5
    cursor.execute(query)

    # Fetch the rows
    rows = cursor.fetchall()

    df = pd.DataFrame(data=rows, columns=['Brand_Prod_ID','Descript'])

    SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

    return df


In [5]:
def get_related_links(Brand_Prod_ID):
    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = f"SELECT image_link FROM product_img WHERE Brand_id = {Brand_Prod_ID} ;"
    cursor.execute(query)
    # Fetch the rows
    image_links = cursor.fetchall()

    SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

    df_images = pd.DataFrame(data=image_links, columns=['image_link'])
    df_images.head()

    df_images['image_link'] = df_images['image_link'].apply(lambda x: x if 'https' in x else 'https:' +  x )

    return df_images.image_link

In [6]:
conn, cursor = SQLf.sql_db_functions.connect_sql()

query = f"SELECT image_link FROM product_img ;"
cursor.execute(query)
# Fetch the rows
image_links = cursor.fetchall()

SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

df_images = pd.DataFrame(data=image_links, columns=['image_link'])
df_images.head()

df_images['image_link'] = df_images['image_link'].apply(lambda x: x if 'https' in x else 'https:' +  x )

In [7]:
def generate_prompt(image_urls, description,prompt_instructions):
    # Construct the image part of the prompt
    images_text = ' '.join([f"{url}" for url in image_urls])
    # Combine with the description
    prompt_text = f"""
        {images_text} \n
        Description: {description} \n
        
        {prompt_instructions}
    """
    return prompt_text

In [8]:
def get_gpt_description(current_images, current_description):

    prompt_instructions = """

    The images are of the same shirt or top, use them to create a detailed description of it and its style.

    Take into account the following characteristics IF APPLICABLE: 

    Fit, Sleeve style, Neckline, Material, Formality, Seasson, Colors, Texture, Transparency, Details and Embellishments, Shape,
    Length, Collar Style, Sleeve Style, Patterns, Patterns placement, Fluidity of fabric, Fabric weight, Pocket Presence, Pocket placement, 
    Pocket size, Breathability, Occasion Suitability, Lapel, etc.


    DONT DESCRIBE THE MODEL.
    Use information from the image and the description.
    IGNORE BACKGROUND     
    ANSWER MUST BE AS SPECIFIC AS POSSIBLE BUT NOT COMPLEX
    WRITE IN A SINGLE PARAGRAPH
    NAME AND DESCRIBE THE COLORS USED
    """

    prompt_text = generate_prompt(current_images, current_description,prompt_instructions)


    os.environ['OPENAI_API_KEY'] = apikey

    # LLM
    model = ChatOpenAI(model="gpt-4o", temperature=0.4)

    # Prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are a fashion specialist."),
            ("user", prompt_text),
        ]
    )

    chain = prompt | model

    response = chain.invoke({})
    return response.content



In [9]:
def get_gpt_form(current_images, current_description):

    prompt_instructions = """

    The images are of the same Top - Shirt piece, use them to fill the following format to list attributes of the clothing piece in image, separate them with comma:  

    *Type:  
    *Fit: (NotSlim fit, regular fit, oversized, tight, loose, etc)
    *Sleeve style: (Not Applicable, Short sleeve, long sleeve, sleeveless, cap sleeve, etc)     
    *Neckline: (Not Applicable, Crew neck, V-neck, scoop neck, etc)   
    *Material: (Cotton, linen, silk, polyester, etc)
    *Formality: (Casual, Business Casual, Formal, etc)
    *Seasson: (Winter, Spring, Summer, Autum)
    *Colors:  (percentage of each color in the clothing peace) 
    *Texture: (Smooth, Rough, Ribbed, Velvety, etc)
    *Transparency: (Opaque, Semi-transparent, Transparent, etc)
    *Details and Embellishments: (e.g, buttons, zippers, embroidery, etc)
    *Shape: (e.g., boxy, fitted, flared, etc) 
    *Length: (e.g., cropped, hip-length, tunic, etc)
    *Collar Style: (e.g., Not Applicable, crew neck, V-neck, polo, button-down)
    *Sleeve Style: (e.g., Not Applicable, short, long, three-quarter, sleeveless, puffed)
    *Patterns: (e.g., stripes, floral, geometric, not applicable)
    *Patterns placement: (e.g., lower back, left sholder, right chest, not applicable) 
    *Fluidity of fabric:
    *Fabric weight: (Light, medium, or heavy)
    *Pocket Presence:(Yes,No)
    *Pocket placement:(e.g., lower back, left sholder, right chest, not applicable)
    *Pocket size:(small, medium, big, not applicable)
    *Breathability: (Low, Medium, High)
    *Occasion Suitability: (Casual, formal, business casual, etc)
    *Lapel: (not applicable, Notch, Peak, Shawl Satin, Slim, Wide, Contrasting, etc)


    Use information from the image and the description.
    IGNORE BACKGROUND
    ONLY ANSWER THE FORMAT  
    DONT USE THE BRAND NAME IN ANY DESCRIPTION  
    IN EACH DESCRIPTION - CHARACTERISTIC SHOULD BE ITS OWN DESCRIPTION
    YOU CAN USE MULTIPLE DESCRIPTIONS IN A SINGLE ATTRIBUTE  
    DONT USE MULTICOLOR OR Multicolor, NAME THE SPECIFIC COLORS  
    COLORS SHOULD INCLUDE PRINT AND CLOTHE COLORS WITHOUT ANY DESCRIPTION OF THE PRINT ONLY MAIN COLORS
   
    """

    prompt_text = generate_prompt(current_images, current_description,prompt_instructions)


    os.environ['OPENAI_API_KEY'] = apikey

    # LLM
    model = ChatOpenAI(model="gpt-4o", temperature=0.4)

    # Prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are a fashion specialist."),
            ("user", prompt_text),
        ]
    )

    chain = prompt | model

    response = chain.invoke({})
    return response.content



In [10]:
def load_form_to_db(current_product_ID,form, Summary):

    
    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = """
    INSERT INTO product_characteristics (Brand_id, Detail, Summary) VALUES (%s, %s, %s)
    RETURNING Brand_id;
    """
    #INSERT INTO product_characteristics (Brand_id, Detail, Summary) VALUES (%s, %s, %s)

    cursor.execute(query, (current_product_ID, form, Summary))

    # Fetch the rows
    #rows = cursor.fetchall()

    conn.commit()

    SQLf.sql_db_functions.close_connection_db(conn=conn,cursor=cursor)

    #return rows

    


In [11]:
df = get_products()

In [12]:
df.shape

(82, 2)

In [13]:
df.Brand_Prod_ID.unique().shape

(82,)

In [14]:
# restart_product_char()

In [14]:
df_final= pd.DataFrame(columns=['Brand_Prod_ID','current_image_links','cloth_summary','cloth_form'])
n_rows = len(df)

for current_product_ID in df.Brand_Prod_ID:

    current_image_links = get_related_links(current_product_ID)
    current_description = df[df.Brand_Prod_ID == current_product_ID]['Descript']

    cloth_summary = get_gpt_description(
        current_images=current_image_links,
        current_description=current_description
        )
    
    
    cloth_form = get_gpt_form(
        current_images=current_image_links,
        current_description=cloth_summary + '/n' + current_description
        )
    
    temp_df = pd.DataFrame([{
        'Brand_Prod_ID':current_product_ID,
        'current_image_links':current_image_links,
        'cloth_summary':cloth_summary,
        'cloth_form':cloth_form
        }])
    
    df_final = pd.concat(
            [df_final,temp_df],
            ignore_index=True
        )
    #load_form_to_db(current_product_ID = current_product_ID,form=cloth_form, Summary= cloth_summary)
    
    if current_product_ID % 10 == 0:
        print(f"loaded {(current_product_ID)/n_rows}")

    elif current_product_ID == 0:
        print(f"loaded {current_product_ID}")

loaded 0.12195121951219512
loaded 0.24390243902439024
loaded 0.36585365853658536
loaded 0.4878048780487805
loaded 0.6097560975609756
loaded 0.7317073170731707
loaded 0.8536585365853658
loaded 0.975609756097561


# checking !!!

In [17]:
Brand_Prod_ID = 20

conn, cursor = SQLf.sql_db_functions.connect_sql()

query = f"SELECT Brand_Prod_ID, Descript  FROM Products WHERE Brand_Prod_ID = {Brand_Prod_ID}  ;" #LIMIT 5
cursor.execute(query)

# Fetch the rows
rows = cursor.fetchall()

df = pd.DataFrame(data=rows, columns=['Brand_Prod_ID','Descript'])

SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)


conn, cursor = SQLf.sql_db_functions.connect_sql()

query = f"SELECT image_link FROM product_img WHERE Brand_id = {Brand_Prod_ID} ;"
cursor.execute(query)
# Fetch the rows
image_links = cursor.fetchall()

SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

df_images_temp = pd.DataFrame(data=image_links, columns=['image_link'])
df_images_temp.head()

df_images_temp['image_link'] = df_images_temp['image_link'].apply(lambda x: x if 'https' in x else 'https:' +  x )


In [23]:
df.head()

Unnamed: 0,Brand_Prod_ID,Descript
0,20,"Il modello è alto 177cm/5'10"" e indossa la ta..."


In [20]:
df.Descript[0]

' Il modello è alto 177cm/5\'10" e indossa la taglia S Corto Senza maniche Slim fit A fascia, Senza spalline Marrone scuro, Tinta unita DIVIDED Mesh L\'elastan è una fibra sintetica elastica ricavata dal petrolio (una risorsa fossile). Il poliestere è una fibra sintetica che si ottiene dal petrolio (una risorsa fossile). Materials: '

In [22]:
for i in df_images_temp.image_link:
    print(i)

https://image.hm.com/assets/hm/37/69/37690d9a17be5948d0b73bfbd7ed84ec9b60e276.jpg?imwidth=2160
https://image.hm.com/assets/hm/64/e5/64e575ec868c59263700db854477c56b6db21caf.jpg?imwidth=2160


## check only prod

In [24]:
current_product_ID = 20

current_image_links = get_related_links(current_product_ID)
current_description = df[df.Brand_Prod_ID == current_product_ID]['Descript']

In [25]:
current_image_links

0    https://image.hm.com/assets/hm/37/69/37690d9a1...
1    https://image.hm.com/assets/hm/64/e5/64e575ec8...
Name: image_link, dtype: object

In [28]:
current_description[0]

' Il modello è alto 177cm/5\'10" e indossa la taglia S Corto Senza maniche Slim fit A fascia, Senza spalline Marrone scuro, Tinta unita DIVIDED Mesh L\'elastan è una fibra sintetica elastica ricavata dal petrolio (una risorsa fossile). Il poliestere è una fibra sintetica che si ottiene dal petrolio (una risorsa fossile). Materials: '

In [29]:
cloth_summary = get_gpt_description(
    current_images=current_image_links,
    current_description=current_description
)

In [30]:
cloth_summary

"The shirt depicted in the images is a stylish, casual piece that features a relaxed fit, making it comfortable for everyday wear. It has long sleeves with buttoned cuffs, adding a touch of formality while maintaining a laid-back vibe. The neckline is a classic collar style, suitable for both casual and semi-formal occasions. Made from a lightweight and breathable material, this shirt is ideal for warmer seasons. The primary color is a soft, muted beige, complemented by a subtle, vertical stripe pattern in white, which adds a bit of visual interest without being overwhelming. The fabric appears to have a smooth texture with a slight sheen, enhancing its overall polished look. The shirt includes a single chest pocket on the left side, which is both functional and stylish. The pocket is moderately sized, blending seamlessly with the shirt's design. This versatile piece can be dressed up with trousers or dressed down with jeans, making it a great addition to any wardrobe."

In [None]:
#cloth_form = get_gpt_form(
#    current_images=current_image_links,
#    current_description=cloth_summary + '/n' + current_description
#)