In [1]:
from dotenv import load_dotenv
import os
import csv
import pandas as pd
import numpy as np
import math
import json
import random

load_dotenv()

# Load the environment variables from the .env file
MONGO_URI = os.environ.get('MONGO_URI')
SHORTLISTED_PRODUCTS_FULL_DATA = "shortlisted_articles.csv"
ALL_PRODUCTS = "articles.csv"
PRODUCTS_KNOWLEDGE_BASE_JSON_FILE = "products_knowledge_base.json"

In [2]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

# Create a new client and connect to the server
client = MongoClient(MONGO_URI, server_api=ServerApi('1'))
db = client["test"]  # Replace with your database name
products_collection = db["products_test"] 

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Ssuccessfully connected to MongoDB as admin!")
except Exception as e:
    print(e)


KeyboardInterrupt: 

In [3]:
articles = pd.read_csv(SHORTLISTED_PRODUCTS_FULL_DATA)
articles['article_id'] = articles['article_id'].apply(lambda x: f"{x:0>10}")
articles['product_code'] = articles['product_code'].apply(lambda x: f"{x:0>7}")
articles['row_number'] = articles.index

# ## Get info of unique products (No articles for individual products)
# articles = articles.drop_duplicates(subset=['product_code'])

# addOnArticles = pd.read_csv('___new_data___.csv')
# targetArticles = addOnArticles['article_id']
# allArticles = pd.read_csv(ALL_PRODUCTS)
# filtered_df = allArticles[allArticles['article_id'].isin(targetArticles)]
# filtered_df.to_csv('addon_full_data.csv', index=False)

articles.head(2)
# articles.columns

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,row_number
0,189383001,189383,BASIC LS TURTLE NECK BODY,256,Bodysuit,Garment Upper body,1010016,Solid,10,White,...,G,Baby Sizes 50-98,4,Baby/Children,44,Baby Essentials & Complements,1002,Jersey Basic,"Long-sleeved bodysuit in soft, ribbed organic ...",0
1,189955076,189955,Nora tee,255,T-shirt,Garment Upper body,1010010,Melange,71,Light Blue,...,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Top in soft slub jersey with a fake front pock...,1


In [4]:
categoricalColumns = [
    'product_type_name',
    'product_group_name',
    'graphical_appearance_name',
    'perceived_colour_value_name',
    'perceived_colour_master_name',
    'index_group_name',
    'garment_group_name',
]

for column in categoricalColumns:
    unique_values = articles[column].unique()
    print(f"Unique values in '{column}':")
    print(unique_values)
    print()

Unique values in 'product_type_name':
['Bodysuit' 'T-shirt' 'Dress' 'Trousers' 'Top' 'Jacket' 'Blazer' 'Sweater'
 'Outdoor Waistcoat' 'Blouse' 'Shorts' 'Coat' 'Garment Set' 'Vest top'
 'Cardigan' 'Skirt' 'Jumpsuit/Playsuit' 'Hoodie' 'Dungarees' 'Slippers'
 'Gloves' 'Heeled sandals' 'Other accessories' 'Necklace' 'Earring' 'Bag'
 'Hat/beanie' 'Sandals' 'Hat/brim' 'Scarf' 'Other shoe' 'Boots' 'Belt'
 'Womens Ethnic Wear' 'Mens Ethnic wear' 'Sneakers' 'Running Shoes'
 "Men's Watches" "Women's Watches"]

Unique values in 'product_group_name':
['Garment Upper body' 'Garment Full body' 'Garment Lower body' 'Shoes'
 'Accessories' 'Festive / Traditional' 'Watches']

Unique values in 'graphical_appearance_name':
['Solid' 'Melange' 'Colour blocking' 'Denim' 'All over pattern'
 'Embroidery' 'Front print' 'Placement print' 'Stripe' 'Jacquard'
 'Glittering/Metallic' 'Mixed solid/pattern' 'Treatment' 'Check'
 'Application/3D' 'printed' 'Printed' 'Striped']

Unique values in 'perceived_colour_value_n

In [5]:
articles.head(2)

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc,row_number
0,189383001,189383,BASIC LS TURTLE NECK BODY,256,Bodysuit,Garment Upper body,1010016,Solid,10,White,...,G,Baby Sizes 50-98,4,Baby/Children,44,Baby Essentials & Complements,1002,Jersey Basic,"Long-sleeved bodysuit in soft, ribbed organic ...",0
1,189955076,189955,Nora tee,255,T-shirt,Garment Upper body,1010010,Melange,71,Light Blue,...,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Top in soft slub jersey with a fake front pock...,1


In [6]:
columnNameMapping = {
    # 'row_number': 'Row number',
    'article_id': 'Article ID',
    'prod_name': 'Product name',
    'product_code': 'Product code',
    'product_type_name': 'Product type',
    'product_group_name': 'Product group',
    'graphical_appearance_name': 'Graphical appearance',
    'colour_group_name': 'Colour group',
    'perceived_colour_value_name': 'Perceived colour',
    'perceived_colour_master_name': 'Primary Perceived colour',
    'department_name': 'Department',
    'index_group_name': 'Index group',
    'section_name': 'Section',
    'garment_group_name': 'Garment group',
    'detail_desc': 'Detail description'
}
selectedColumnsForKnowledgeBase = list(columnNameMapping.keys())

# Rename the columns in the DataFrame
renamed_articles = articles[selectedColumnsForKnowledgeBase].rename(columns=columnNameMapping)

# Convert the renamed DataFrame to a dictionary within 'products' key
knowledgeBaseJson = {'products': renamed_articles.to_dict(orient='records')}

with open(PRODUCTS_KNOWLEDGE_BASE_JSON_FILE, 'w') as json_file:
    json.dump(knowledgeBaseJson, json_file, indent=2)

print(f"DataFrame (Size: {articles.shape[0]}) converted and saved as JSON to {PRODUCTS_KNOWLEDGE_BASE_JSON_FILE}")

DataFrame (Size: 137) converted and saved as JSON to products_knowledge_base.json


In [190]:
## Utils

def getProductImageUrl(article_id: str):
    return f"https://res.cloudinary.com/dp0ayty6p/image/upload/fkgrid/{article_id}.jpg"
# print(getProductImageUrl('0763200004'))
# print(getProductImageUrl('0681002001'))

def generate_random_prices():
    random_price = random.randint(200, 2000)  # Adjust the range as needed
    cutted_price = random.randint(random_price + 50, random_price + 300)  # Ensure cutted price is higher
    return random_price, cutted_price

def getCategory(rawCategory):
    return "General" if rawCategory == "Divided" else rawCategory

random_price, cutted_price = generate_random_prices()
print("Discounted Price:", random_price)
print("Original Price:", cutted_price)

Discounted Price: 717
Original Price: 915


In [None]:
# for idx, row in enumerate([articles.iloc[0], articles.iloc[2]]):
for idx, row in articles.iterrows():
    random_price, cutted_price = generate_random_prices()
    product_data = {
        'article_id': row['article_id'],
        'product_code': row['product_code'],
        'name': row['prod_name'],
        'description': row['detail_desc'],
        'elaborated_description': '',
        'highlights': ['Special Price'],
        'specifications': [
            {
                "title": "Size",
                "description": "M",
            },
            {
                "title": "Size",
                "description": "L",
            },
            {
                "title": "Size",
                "description": "XL",
            }
        ],
        'price': random_price,
        'cuttedPrice': cutted_price,
        'images': [
            {
                "public_id": row['article_id'],
                "url": getProductImageUrl(row['article_id']),
            }
        ],
        'brand': {
            "name": "H&M",
            "logo": {
                "public_id": "hnmlogo",
                "url": "https://res.cloudinary.com/dp0ayty6p/image/upload/fkgrid_misc/hnmlogo.png"
            }
        },
        'category': getCategory(row['index_group_name']),
        'stock': 97,
        'warranty': 0,
        'ratings': 0,
        'numOfReviews': 0,
        'user': {
            '$oid': '64d531cb47ece0f87e20b50f'
        },
        'reviews': [],
        "impressions": [],
        'purchases': [],
        'product_type_name' : row['product_type_name'],
        'product_group_name': row['product_group_name'],
        'graphical_appearance_name': row['graphical_appearance_name'],
        'perceived_colour_value_name': row['perceived_colour_value_name'],
        'perceived_colour_master_name': row['perceived_colour_master_name'],
        'colour_group_name': row['colour_group_name'],
        'department_name': row['department_name'],
        'index_name': row['index_name'],
        'index_group_name': row['index_group_name'],
        'garment_group_name': row['garment_group_name'],
        'section_name': row['section_name'],
    }

    # print(json.dumps(product_data, indent=4))
    # products_collection.insert_one(product_data)
    print(f"#{idx+1} Created product id {product_data['article_id']} ({product_data['name']}) !")

In [223]:
def promptGenerator(product, person:str='a person'):
  # print(product)
  graphical_appearance_name = product['graphical_appearance_name']
  perceived_colour_value_name = product['perceived_colour_value_name']
  perceived_colour_master_name = product['perceived_colour_master_name']
  garment_group_name = product['garment_group_name']
  index_group_name = getCategory(product['index_group_name'])
  detail_desc = product['detail_desc']

  prefix = f"8k uhd, dslr, soft lighting, high quality, film grain, full frame, Fujifilm XT3 "
  
  body = f"full portrait photo of {person} wearing {graphical_appearance_name} {perceived_colour_value_name} {perceived_colour_master_name} {index_group_name} {detail_desc} Suitable for {garment_group_name} <lora:add_detail:1> "

  postfix = f""

  return prefix + body + postfix

hrithik_embedding = "hrithikroshan with (angular chiseled face)"

print(promptGenerator(articles.iloc[0]))
print(promptGenerator(articles.iloc[0], hrithik_embedding))


for _, product in enumerate([articles.iloc[0], articles.iloc[2], articles.iloc[23], articles.iloc[25]]):
# for _, product in articles.iterrows():
  print(promptGenerator(product, hrithik_embedding))

8k uhd, dslr, soft lighting, high quality, film grain, full frame, Fujifilm XT3 full portrait photo of a person wearing Solid Light White Baby/Children Long-sleeved bodysuit in soft, ribbed organic cotton jersey with a polo neck, concealed press-studs on one shoulder and press-studs at the crotch. Suitable for Jersey Basic <lora:add_detail:1> 
8k uhd, dslr, soft lighting, high quality, film grain, full frame, Fujifilm XT3 full portrait photo of hrithikroshan with (angular chiseled face) wearing Solid Light White Baby/Children Long-sleeved bodysuit in soft, ribbed organic cotton jersey with a polo neck, concealed press-studs on one shoulder and press-studs at the crotch. Suitable for Jersey Basic <lora:add_detail:1> 
8k uhd, dslr, soft lighting, high quality, film grain, full frame, Fujifilm XT3 full portrait photo of hrithikroshan with (angular chiseled face) wearing Solid Light White Baby/Children Long-sleeved bodysuit in soft, ribbed organic cotton jersey with a polo neck, concealed 

In [None]:
def generateKnowledgeBaseText(articles):
    

In [228]:
articles.columns

Index(['article_id', 'product_code', 'prod_name', 'product_type_no',
       'product_type_name', 'product_group_name', 'graphical_appearance_no',
       'graphical_appearance_name', 'colour_group_code', 'colour_group_name',
       'perceived_colour_value_id', 'perceived_colour_value_name',
       'perceived_colour_master_id', 'perceived_colour_master_name',
       'department_no', 'department_name', 'index_code', 'index_name',
       'index_group_no', 'index_group_name', 'section_no', 'section_name',
       'garment_group_no', 'garment_group_name', 'detail_desc', 'row_number'],
      dtype='object')