In [1]:
import sys, os
sys.path.append(os.path.join(os.path.dirname('__file__'), '..', 'DB_and_Azure'))
import sql_db_functions as SQLf

import pandas as pd
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 

import getpass
from apikey import apikey 


In [2]:
def restart_product_char():
    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = """
    TRUNCATE TABLE product_characteristics;
    """
    cursor.execute(query)

    conn.commit()

In [3]:
def get_products():

    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = f"SELECT Brand_Prod_ID, Descript  FROM Products ;" #LIMIT 5
    cursor.execute(query)

    # Fetch the rows
    rows = cursor.fetchall()

    df = pd.DataFrame(data=rows, columns=['Brand_Prod_ID','Descript'])

    SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

    return df


In [4]:
def get_related_links(Brand_Prod_ID):
    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = f"SELECT image_link FROM product_img WHERE Brand_id = {Brand_Prod_ID} ;"
    cursor.execute(query)
    # Fetch the rows
    image_links = cursor.fetchall()

    SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

    df_images = pd.DataFrame(data=image_links, columns=['image_link'])
    df_images.head()

    df_images['image_link'] = df_images['image_link'].apply(lambda x: x if 'https' in x else 'https:' +  x )

    return df_images.image_link

In [5]:
conn, cursor = SQLf.sql_db_functions.connect_sql()

query = f"SELECT image_link FROM product_img ;"
cursor.execute(query)
# Fetch the rows
image_links = cursor.fetchall()

SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

df_images = pd.DataFrame(data=image_links, columns=['image_link'])
df_images.head()

df_images['image_link'] = df_images['image_link'].apply(lambda x: x if 'https' in x else 'https:' +  x )

In [6]:
def generate_prompt(image_urls, description,prompt_instructions):
    # Construct the image part of the prompt
    images_text = ' '.join([f"{url}" for url in image_urls])
    # Combine with the description
    prompt_text = f"""
        {images_text} \n
        Description: {description} \n
        
        {prompt_instructions}
    """
    return prompt_text

In [7]:
def get_gpt_description(current_images, current_description):

    prompt_instructions = """

    The images are of the same shirt or top, use them to create a detailed description of it and its style.

    Take into account the following characteristics IF APPLICABLE: 

    Fit, Sleeve style, Neckline, Material, Formality, Seasson, Colors, Texture, Transparency, Details and Embellishments, Shape,
    Length, Collar Style, Sleeve Style, Patterns, Patterns placement, Fluidity of fabric, Fabric weight, Pocket Presence, Pocket placement, 
    Pocket size, Breathability, Occasion Suitability, Lapel, etc.


    DONT DESCRIBE THE MODEL.
    Use information from the image and the description.
    IGNORE BACKGROUND     
    ANSWER MUST BE AS SPECIFIC AS POSSIBLE BUT NOT COMPLEX
    WRITE IN A SINGLE PARAGRAPH
    NAME AND DESCRIBE THE COLORS USED
    """

    prompt_text = generate_prompt(current_images, current_description,prompt_instructions)


    os.environ['OPENAI_API_KEY'] = apikey

    # LLM
    model = ChatOpenAI(model="gpt-4o", temperature=0.4)

    # Prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are a fashion specialist."),
            ("user", prompt_text),
        ]
    )

    chain = prompt | model

    response = chain.invoke({})
    return response.content



In [8]:
def get_gpt_form(current_images, current_description):

    prompt_instructions = """

    The images are of the same Top - Shirt piece, use them to fill the following format to list attributes of the clothing piece in image, separate them with comma:  

    *Type:  
    *Fit: (NotSlim fit, regular fit, oversized, tight, loose, etc)
    *Sleeve style: (Not Applicable, Short sleeve, long sleeve, sleeveless, cap sleeve, etc)     
    *Neckline: (Not Applicable, Crew neck, V-neck, scoop neck, etc)   
    *Material: (Cotton, linen, silk, polyester, etc)
    *Formality: (Casual, Business Casual, Formal, etc)
    *Seasson: (Winter, Spring, Summer, Autum)
    *Colors:  (percentage of each color in the clothing peace) 
    *Texture: (Smooth, Rough, Ribbed, Velvety, etc)
    *Transparency: (Opaque, Semi-transparent, Transparent, etc)
    *Details and Embellishments: (e.g, buttons, zippers, embroidery, etc)
    *Shape: (e.g., boxy, fitted, flared, etc) 
    *Length: (e.g., cropped, hip-length, tunic, etc)
    *Collar Style: (e.g., Not Applicable, crew neck, V-neck, polo, button-down)
    *Sleeve Style: (e.g., Not Applicable, short, long, three-quarter, sleeveless, puffed)
    *Patterns: (e.g., stripes, floral, geometric, not applicable)
    *Patterns placement: (e.g., lower back, left sholder, right chest, not applicable) 
    *Fluidity of fabric:
    *Fabric weight: (Light, medium, or heavy)
    *Pocket Presence:(Yes,No)
    *Pocket placement:(e.g., lower back, left sholder, right chest, not applicable)
    *Pocket size:(small, medium, big, not applicable)
    *Breathability: (Low, Medium, High)
    *Occasion Suitability: (Casual, formal, business casual, etc)
    *Lapel: (not applicable, Notch, Peak, Shawl Satin, Slim, Wide, Contrasting, etc)


    Use information from the image and the description.
    IGNORE BACKGROUND
    ONLY ANSWER THE FORMAT  
    DONT USE THE BRAND NAME IN ANY DESCRIPTION  
    IN EACH DESCRIPTION - CHARACTERISTIC SHOULD BE ITS OWN DESCRIPTION
    YOU CAN USE MULTIPLE DESCRIPTIONS IN A SINGLE ATTRIBUTE  
    DONT USE MULTICOLOR OR Multicolor, NAME THE SPECIFIC COLORS  
    COLORS SHOULD INCLUDE PRINT AND CLOTHE COLORS WITHOUT ANY DESCRIPTION OF THE PRINT ONLY MAIN COLORS
   
    """

    prompt_text = generate_prompt(current_images, current_description,prompt_instructions)


    os.environ['OPENAI_API_KEY'] = apikey

    # LLM
    model = ChatOpenAI(model="gpt-4o", temperature=0.4)

    # Prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", "You are a fashion specialist."),
            ("user", prompt_text),
        ]
    )

    chain = prompt | model

    response = chain.invoke({})
    return response.content



In [9]:
def load_form_to_db(current_product_ID,form, Summary):

    
    conn, cursor = SQLf.sql_db_functions.connect_sql()

    query = """
    INSERT INTO product_characteristics (Brand_id, Detail, Summary) VALUES (%s, %s, %s)
    RETURNING Brand_id;
    """
    #INSERT INTO product_characteristics (Brand_id, Detail, Summary) VALUES (%s, %s, %s)

    cursor.execute(query, (current_product_ID, form, Summary))

    # Fetch the rows
    #rows = cursor.fetchall()

    conn.commit()

    SQLf.sql_db_functions.close_connection_db(conn=conn,cursor=cursor)

    #return rows

    


In [10]:
df = get_products()

In [11]:
df.shape

(154, 2)

In [12]:
df.Brand_Prod_ID.unique().shape

(154,)

In [13]:
df_final= pd.DataFrame(columns=['Brand_Prod_ID','current_image_links','cloth_summary','cloth_form'])
n_rows = len(df)

for current_product_ID in df.Brand_Prod_ID:

    current_image_links = get_related_links(current_product_ID)
    current_description = df[df.Brand_Prod_ID == current_product_ID]['Descript']

    cloth_summary = get_gpt_description(
        current_images=current_image_links,
        current_description=current_description
        )
    
    
    cloth_form = get_gpt_form(
        current_images=current_image_links,
        current_description=cloth_summary + '/n' + current_description
        )
    
    temp_df = pd.DataFrame([{
        'Brand_Prod_ID':current_product_ID,
        'current_image_links':current_image_links,
        'cloth_summary':cloth_summary,
        'cloth_form':cloth_form
        }])
    
    df_final = pd.concat(
            [df_final,temp_df],
            ignore_index=True
        )
    
    load_form_to_db(current_product_ID = current_product_ID,form=cloth_form, Summary= cloth_summary)
    
    if current_product_ID % 5 == 0:
        print(f"loaded {(current_product_ID)/n_rows}")

    elif current_product_ID == 0:
        print(f"loaded {current_product_ID}")

loaded 0.032467532467532464
loaded 0.3246753246753247
loaded 0.06493506493506493
loaded 0.09740259740259741
loaded 0.12987012987012986
loaded 0.16233766233766234
loaded 0.19480519480519481
loaded 0.22727272727272727
loaded 0.487012987012987
loaded 0.2597402597402597
loaded 0.2922077922077922
loaded 0.35714285714285715
loaded 0.38961038961038963
loaded 0.42207792207792205
loaded 0.45454545454545453
loaded 0.6168831168831169
loaded 0.5194805194805194
loaded 0.551948051948052
loaded 0.5844155844155844
loaded 0.6493506493506493
loaded 0.6818181818181818
loaded 0.7142857142857143
loaded 0.7467532467532467
loaded 0.7792207792207793
loaded 0.8116883116883117
loaded 0.8441558441558441
loaded 0.8766233766233766
loaded 0.9090909090909091
loaded 0.9415584415584416
loaded 0.974025974025974


# checking !!!

In [14]:
Brand_Prod_ID = 20

conn, cursor = SQLf.sql_db_functions.connect_sql()

query = f"SELECT Brand_Prod_ID, Descript  FROM Products WHERE Brand_Prod_ID = {Brand_Prod_ID}  ;" #LIMIT 5
cursor.execute(query)

# Fetch the rows
rows = cursor.fetchall()

df = pd.DataFrame(data=rows, columns=['Brand_Prod_ID','Descript'])

SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)


conn, cursor = SQLf.sql_db_functions.connect_sql()

query = f"SELECT image_link FROM product_img WHERE Brand_id = {Brand_Prod_ID} ;"
cursor.execute(query)
# Fetch the rows
image_links = cursor.fetchall()

SQLf.sql_db_functions.close_connection_db(conn=conn, cursor=cursor)

df_images_temp = pd.DataFrame(data=image_links, columns=['image_link'])
df_images_temp.head()

df_images_temp['image_link'] = df_images_temp['image_link'].apply(lambda x: x if 'https' in x else 'https:' +  x )


In [15]:
df.head()

Unnamed: 0,Brand_Prod_ID,Descript
0,20,Ispirato allo spirito estivo e ai beach club d...


In [16]:
df.Descript[0]

"Ispirato allo spirito estivo e ai beach club della costa italiana, questo articolo fa parte della linea Gucci Lido. I codici emblematici della Maison e gli eleganti motivi vengono reinterpretati in chiave contemporanea per la collezione Pre-Fall. Questa maglia dal taglio dritto in twill di seta è definita da una stampa Morsetto Gucci e righe, che rende omaggio alla tradizione equestre della Maison.  Twill di seta marrone e verde con stampa Morsetto Gucci e righe Profilo bianco Senza fodera Girocollo Maniche corte Chiusura con bottoni oversize Lunghezza: 77 cm; taglia 42 (IT) Fabbricato in Italia Il prodotto raffigurato nell'immagine è taglia 42 (IT)"

In [None]:
for i in df_images_temp.image_link:
    print(i)

## check only prod

In [None]:
current_product_ID = 20

current_image_links = get_related_links(current_product_ID)
current_description = df[df.Brand_Prod_ID == current_product_ID]['Descript']

In [None]:
current_image_links

In [None]:
current_description[0]

In [None]:
cloth_summary = get_gpt_description(
    current_images=current_image_links,
    current_description=current_description
)

In [None]:
cloth_summary

In [None]:
#cloth_form = get_gpt_form(
#    current_images=current_image_links,
#    current_description=cloth_summary + '/n' + current_description
#)