In [0]:
from databricks.sdk import WorkspaceClient

ws = WorkspaceClient()
current_user = ws.current_user.me().user_name
first_name, last_name = current_user.split('@')[0].split('.')
formatted_name = f"{first_name[0]}_{last_name}"

catalog = f'dbdemos_{formatted_name}'
print(f"Catalog name: {catalog}")

In [0]:
import random
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType

# Initialize Spark session
spark = SparkSession.builder.appName("SyntheticClothingData").getOrCreate()

# Define categories and their associated price ranges and styles
clothing_data = {
    "Dress": {
        "price_range": (39.99, 299.99),
        "styles": ["Evening", "Summer", "Cocktail", "Casual", "Formal", "Midi", "Maxi", "Mini", "Bodycon", "A-line", "Vintage", "Modern", "Party", "Floral", "Elegant"]
    },
    "Shirt": {
        "price_range": (19.99, 89.99),
        "styles": ["Button-up", "Oxford", "Dress", "Flannel", "Casual", "Business", "Slim-fit", "Regular", "Linen", "Denim", "Checked", "Striped", "Hawaiian", "Polo"]
    },
    "T-shirt": {
        "price_range": (9.99, 49.99),
        "styles": ["Graphic", "Plain", "V-neck", "Crew-neck", "Oversized", "Fitted", "Vintage", "Sports", "Logo", "Printed", "Organic", "Basic", "Premium", "Slim"]
    },
    "Jeans": {
        "price_range": (29.99, 149.99),
        "styles": ["Skinny", "Slim", "Regular", "Relaxed", "Boot-cut", "Straight", "Distressed", "High-waisted", "Low-rise", "Vintage", "Designer", "Stretch", "Raw", "Tapered"]
    },
    "Pants": {
        "price_range": (24.99, 129.99),
        "styles": ["Chino", "Cargo", "Formal", "Casual", "Slim-fit", "Wide-leg", "Cropped", "Pleated", "Flat-front", "Corduroy", "Drawstring", "Linen", "Cotton", "Wool"]
    },
    "Skirt": {
        "price_range": (19.99, 99.99),
        "styles": ["Mini", "Midi", "Maxi", "Pencil", "A-line", "Pleated", "Wrap", "Denim", "Leather", "Floral", "Casual", "Formal", "Flared", "High-waisted", "Tennis"]
    },
    "Jacket": {
        "price_range": (49.99, 299.99),
        "styles": ["Denim", "Leather", "Bomber", "Blazer", "Windbreaker", "Sports", "Casual", "Formal", "Puffer", "Quilted", "Waterproof", "Lightweight", "Varsity", "Hooded"]
    },
    "Coat": {
        "price_range": (69.99, 399.99),
        "styles": ["Winter", "Trench", "Overcoat", "Parka", "Rain", "Wool", "Cashmere", "Long", "Short", "Belted", "Formal", "Casual", "Puffer", "Duffle", "Down"]
    },
    "Sweater": {
        "price_range": (29.99, 149.99),
        "styles": ["Pullover", "Cardigan", "Turtleneck", "V-neck", "Crew-neck", "Cable-knit", "Cashmere", "Wool", "Cotton", "Chunky", "Lightweight", "Oversized", "Fitted", "Fair Isle"]
    },
    "Blouse": {
        "price_range": (24.99, 119.99),
        "styles": ["Silk", "Cotton", "Formal", "Casual", "Button-up", "Tie-neck", "Ruffle", "Floral", "Print", "Sleeveless", "Short-sleeve", "Long-sleeve", "Oversized", "Cropped"]
    },
    "Suit": {
        "price_range": (149.99, 799.99),
        "styles": ["Business", "Formal", "Wedding", "Slim-fit", "Regular", "Three-piece", "Two-piece", "Pinstripe", "Solid", "Wool", "Linen", "Cotton", "Designer", "Tailored"]
    },
    "Shorts": {
        "price_range": (19.99, 79.99),
        "styles": ["Denim", "Cargo", "Chino", "Athletic", "Swim", "Bermuda", "Casual", "Formal", "Pleated", "Stretch", "Linen", "Cotton", "Drawstring", "High-waisted"]
    }
}

# Define colors with descriptive adjectives
colors = {
    "Red": ["Vibrant", "Deep", "Bright", "Dark", "Cherry", "Crimson", "Wine", "Scarlet", "Ruby"],
    "Blue": ["Navy", "Sky", "Royal", "Turquoise", "Light", "Dark", "Midnight", "Denim", "Azure"],
    "Green": ["Emerald", "Olive", "Forest", "Sage", "Mint", "Dark", "Light", "Kelly", "Lime"],
    "Black": ["Pure", "Jet", "Classic", "Matte", "Deep", "Rich", "Soft", "Charcoal", "Onyx"],
    "White": ["Pure", "Bright", "Snow", "Ivory", "Off-", "Cream", "Eggshell", "Antique", "Soft"],
    "Yellow": ["Bright", "Pale", "Golden", "Mustard", "Lemon", "Warm", "Sunny", "Dark", "Light"],
    "Purple": ["Deep", "Lavender", "Violet", "Plum", "Royal", "Dark", "Light", "Eggplant", "Lilac"],
    "Pink": ["Pastel", "Hot", "Salmon", "Light", "Bright", "Dusty", "Blush", "Coral", "Rose"],
    "Gray": ["Light", "Dark", "Charcoal", "Silver", "Stone", "Heather", "Ash", "Slate", "Pewter"],
    "Brown": ["Dark", "Light", "Chocolate", "Coffee", "Tan", "Khaki", "Caramel", "Beige", "Mocha"],
    "Orange": ["Burnt", "Bright", "Rust", "Tangerine", "Peach", "Light", "Dark", "Coral", "Amber"],
    "Burgundy": ["Deep", "Rich", "Wine", "Dark", "Maroon", "Berry", "Merlot", "Cranberry", "Crimson"],
    "Beige": ["Light", "Tan", "Sand", "Khaki", "Stone", "Cream", "Oatmeal", "Taupe", "Ecru"]
}

# Description templates
description_templates = {
    "Dress": [
        "A {adj} {color} {style} dress perfect for {occasion}. Features {feature} and {feature2}.",
        "This {adj} {color} {style} dress is ideal for {occasion}. Made with {material} for {benefit}.",
        "Stunning {color} {style} dress designed for {occasion}. {feature} with {feature2}.",
        "Elegant {color} {style} dress that offers {benefit}. Perfect for {occasion} with its {feature}."
    ],
    "Shirt": [
        "A {adj} {color} {style} shirt that's perfect for {occasion}. Features {feature} and {feature2}.",
        "This {color} {style} shirt offers {benefit}. Great for {occasion} with its {feature}.",
        "Classic {color} {style} shirt made from {material}. Ideal for {occasion} with {feature}.",
        "Versatile {color} {style} shirt designed for {occasion}. {feature} and {feature2} for {benefit}."
    ],
    "T-shirt": [
        "A {adj} {color} {style} t-shirt perfect for {occasion}. Features {feature} for {benefit}.",
        "Comfortable {color} {style} t-shirt made from {material}. Great for {occasion} with its {feature}.",
        "Casual {color} {style} t-shirt that offers {benefit}. Ideal for {occasion} with {feature}.",
        "Stylish {color} {style} t-shirt designed for {occasion}. {feature} with {feature2}."
    ],
    "Jeans": [
        "Premium {color} {style} jeans perfect for {occasion}. Features {feature} and {feature2}.",
        "These {adj} {color} {style} jeans offer {benefit}. Ideal for {occasion} with their {feature}.",
        "Classic {color} {style} jeans made from {material}. Great for {occasion} with {feature}.",
        "Stylish {color} {style} jeans designed for {occasion}. {feature} with {feature2} for comfort."
    ],
    "Pants": [
        "High-quality {color} {style} pants perfect for {occasion}. Features {feature} and {feature2}.",
        "These {adj} {color} {style} pants offer {benefit}. Ideal for {occasion} with their {feature}.",
        "Versatile {color} {style} pants made from {material}. Great for {occasion} with {feature}.",
        "Comfortable {color} {style} pants designed for {occasion}. {feature} with {feature2}."
    ],
    "Skirt": [
        "A {adj} {color} {style} skirt perfect for {occasion}. Features {feature} and {feature2}.",
        "This {color} {style} skirt offers {benefit}. Great for {occasion} with its {feature}.",
        "Stylish {color} {style} skirt made from {material}. Ideal for {occasion} with {feature}.",
        "Elegant {color} {style} skirt designed for {occasion}. {feature} with {feature2}."
    ],
    "Jacket": [
        "A {adj} {color} {style} jacket perfect for {occasion}. Features {feature} and {feature2}.",
        "This {color} {style} jacket offers {benefit}. Great for {occasion} with its {feature}.",
        "Premium {color} {style} jacket made from {material}. Ideal for {occasion} with {feature}.",
        "Stylish {color} {style} jacket designed for {occasion}. {feature} with {feature2} for {benefit}."
    ],
    "Coat": [
        "A {adj} {color} {style} coat perfect for {occasion}. Features {feature} and {feature2}.",
        "This luxurious {color} {style} coat offers {benefit}. Ideal for {occasion} with its {feature}.",
        "Premium {color} {style} coat made from {material}. Great for {occasion} with {feature}.",
        "Elegant {color} {style} coat designed for {occasion}. {feature} with {feature2} for {benefit}."
    ],
    "Sweater": [
        "A {adj} {color} {style} sweater perfect for {occasion}. Features {feature} and {feature2}.",
        "This cozy {color} {style} sweater offers {benefit}. Great for {occasion} with its {feature}.",
        "Soft {color} {style} sweater made from {material}. Ideal for {occasion} with {feature}.",
        "Warm {color} {style} sweater designed for {occasion}. {feature} with {feature2} for {benefit}."
    ],
    "Blouse": [
        "A {adj} {color} {style} blouse perfect for {occasion}. Features {feature} and {feature2}.",
        "This elegant {color} {style} blouse offers {benefit}. Great for {occasion} with its {feature}.",
        "Sophisticated {color} {style} blouse made from {material}. Ideal for {occasion} with {feature}.",
        "Versatile {color} {style} blouse designed for {occasion}. {feature} with {feature2} for {benefit}."
    ],
    "Suit": [
        "A {adj} {color} {style} suit perfect for {occasion}. Features {feature} and {feature2}.",
        "This professional {color} {style} suit offers {benefit}. Ideal for {occasion} with its {feature}.",
        "Premium {color} {style} suit made from {material}. Great for {occasion} with {feature}.",
        "Sophisticated {color} {style} suit designed for {occasion}. {feature} with {feature2} for {benefit}."
    ],
    "Shorts": [
        "Comfortable {color} {style} shorts perfect for {occasion}. Features {feature} and {feature2}.",
        "These {adj} {color} {style} shorts offer {benefit}. Great for {occasion} with their {feature}.",
        "Casual {color} {style} shorts made from {material}. Ideal for {occasion} with {feature}.",
        "Versatile {color} {style} shorts designed for {occasion}. {feature} with {feature2} for {benefit}."
    ]
}

# Features by category
features = {
    "Dress": ["a flattering silhouette", "delicate embroidery", "a cinched waist", "a flowing design", 
              "elegant draping", "a fitted bodice", "a flared skirt", "intricate detailing", 
              "subtle pleats", "a comfortable stretch", "a modern cut", "a classic design",
              "a stylish print", "adjustable straps", "a hidden zipper"],
    "Shirt": ["a tailored fit", "premium buttons", "a clean cut", "reinforced seams", 
              "breathable fabric", "a structured collar", "adjustable cuffs", "a chest pocket", 
              "a slim silhouette", "wrinkle-resistant material", "classic details", "a contemporary design",
              "quality stitching", "a comfortable stretch", "a modern cut"],
    "T-shirt": ["a relaxed fit", "soft cotton fabric", "a reinforced neckline", "a classic cut", 
                "breathable material", "tagless design", "pre-shrunk fabric", "a modern silhouette", 
                "quality stitching", "a versatile design", "a comfortable stretch", "durable construction",
                "a clean finish", "double-stitched seams", "a smooth texture"],
    "Jeans": ["a perfect fit", "quality denim", "reinforced stitching", "a flattering cut", 
              "stretch technology", "classic detailing", "a comfortable waistband", "premium buttons", 
              "a modern silhouette", "expertly faded design", "durable construction", "five-pocket styling",
              "a stylish look", "deep pockets", "a contoured waistband"],
    "Pants": ["a tailored fit", "a comfortable waistband", "crisp pleats", "quality stitching", 
              "breathable fabric", "a modern cut", "reinforced seams", "deep pockets", 
              "a classic design", "a sleek silhouette", "wrinkle-resistant material", "a clean finish",
              "a comfortable stretch", "a flattering cut", "durable construction"],
    "Skirt": ["a flattering cut", "a smooth lining", "a comfortable waistband", "elegant detailing", 
              "a flowing design", "a classic silhouette", "premium stitching", "a hidden zipper", 
              "subtle pleats", "a contemporary design", "quality fabric", "a modern silhouette",
              "a stylish look", "a comfortable stretch", "a clean finish"],
    "Jacket": ["a tailored fit", "premium zippers", "warm insulation", "a modern cut", 
               "adjustable cuffs", "deep pockets", "a comfortable lining", "quality stitching", 
               "a classic design", "reinforced seams", "a sleek silhouette", "water-resistant material",
               "a stylish look", "a hood option", "wind protection"],
    "Coat": ["premium insulation", "a luxurious lining", "a tailored fit", "quality buttons", 
             "a classic cut", "deep pockets", "a warm collar", "reinforced seams", 
             "elegant detailing", "water-resistant material", "a modern silhouette", "adjustable cuffs",
             "a flattering design", "wind protection", "professional finishing"],
    "Sweater": ["a cozy fit", "soft knit", "a comfortable stretch", "a classic design", 
                "ribbed cuffs", "a warm feel", "quality construction", "a modern cut", 
                "a flattering silhouette", "premium material", "temperature regulation", "durable stitching",
                "a stylish look", "a relaxed fit", "a smooth texture"],
    "Blouse": ["a flattering cut", "delicate details", "a contemporary design", "quality buttons", 
               "a comfortable fit", "elegant draping", "a modern silhouette", "a clean finish", 
               "premium stitching", "a versatile style", "breathable fabric", "a feminine touch",
               "a stylish look", "a relaxed fit", "a polished appearance"],
    "Suit": ["expert tailoring", "premium fabric", "a perfect fit", "classic detailing", 
             "a modern cut", "quality lining", "reinforced stitching", "a professional look", 
             "elegant buttons", "a contemporary silhouette", "a comfortable feel", "a refined design",
             "a stylish look", "functional pockets", "a polished finish"],
    "Shorts": ["a comfortable fit", "a modern cut", "deep pockets", "quality stitching", 
               "breathable fabric", "a versatile design", "a clean finish", "reinforced seams", 
               "a flattering length", "a relaxed silhouette", "a comfortable waistband", "a classic style",
               "a stylish look", "a perfect drape", "durable construction"]
}

# Materials by category
materials = {
    "Dress": ["premium cotton", "soft silk", "luxurious satin", "flowing chiffon", "stretchy jersey", 
              "elegant lace", "structured polyester", "textured linen", "comfortable rayon", "lightweight crepe",
              "rich velvet", "premium blend", "sustainable fabric", "breathable material", "durable construction"],
    "Shirt": ["premium cotton", "soft linen", "oxford cloth", "poplin fabric", "chambray", 
              "textured flannel", "breathable material", "sustainable fabric", "wrinkle-resistant blend", 
              "performance fabric", "comfortable stretch", "quality twill", "denim", "structured polyester", "brushed cotton"],
    "T-shirt": ["soft cotton", "organic material", "jersey knit", "brushed fabric", "quality blend", 
                "breathable material", "sustainable fabric", "premium cotton", "comfortable stretch", 
                "performance material", "lightweight fabric", "durable construction", "pima cotton", 
                "cotton-polyester blend", "ring-spun cotton"],
    "Jeans": ["premium denim", "stretch fabric", "heavyweight cotton", "quality blend", "stonewashed material", 
              "distressed denim", "vintage fabric", "raw denim", "comfortable stretch", "sustainable material", 
              "organic cotton", "premium construction", "selvedge denim", "performance blend", "textured fabric"],
    "Pants": ["premium cotton", "soft wool", "textured linen", "quality blend", "comfortable stretch", 
              "breathable material", "lightweight fabric", "sustainable construction", "wrinkle-resistant blend", 
              "performance material", "corduroy", "twill fabric", "premium polyester", "textured chino", "technical fabric"],
    "Skirt": ["premium cotton", "soft wool", "flowing chiffon", "structured polyester", "comfortable stretch", 
              "elegant lace", "textured linen", "quality blend", "lightweight material", "sustainable fabric", 
              "breathable construction", "quality denim", "textured leather", "premium suede", "flowing rayon"],
    "Jacket": ["premium leather", "quality denim", "water-resistant material", "soft suede", "waxed cotton", 
               "technical fabric", "breathable construction", "warm wool", "premium blend", "sustainable material", 
               "performance fabric", "windproof shell", "down insulation", "premium polyester", "ripstop nylon"],
    "Coat": ["premium wool", "cashmere blend", "soft leather", "quality cotton", "water-resistant material", 
             "down insulation", "technical fabric", "sustainable construction", "warm polyester", "premium blend", 
             "performance material", "breathable shell", "structured fabric", "heavyweight material", "luxurious gabardine"],
    "Sweater": ["soft wool", "cashmere blend", "premium cotton", "quality acrylic", "alpaca wool", 
                "merino wool", "cotton-cashmere blend", "sustainable material", "warm construction", "comfortable stretch", 
                "performance blend", "lambswool", "angora blend", "mohair wool", "technical fabric"],
    "Blouse": ["soft silk", "premium cotton", "flowing chiffon", "comfortable rayon", "quality polyester", 
               "textured linen", "breathable blend", "sustainable fabric", "structured material", "comfortable stretch", 
               "performance construction", "lightweight fabric", "satin finish", "brushed fabric", "elegant lace"],
    "Suit": ["premium wool", "quality cotton", "textured linen", "worsted wool", "soft cashmere", 
             "quality blend", "sustainable material", "performance fabric", "comfortable stretch", "breathable construction", 
             "luxurious twill", "structured polyester", "premium blend", "technical fabric", "tropical wool"],
    "Shorts": ["premium cotton", "quality denim", "textured linen", "comfortable blend", "performance material", 
               "breathable fabric", "lightweight construction", "sustainable material", "structured twill", "quality chino", 
               "comfortable stretch", "technical fabric", "quick-dry material", "premium polyester", "soft fabric"]
}

# Benefits by category
benefits = {
    "Dress": ["all-day comfort", "a flattering fit", "easy movement", "a timeless look", 
              "versatile styling", "day-to-night wearability", "a confidence boost", "a polished appearance", 
              "easy care", "long-lasting quality", "effortless elegance", "a perfect silhouette", 
              "premium comfort", "a luxurious feel", "maximum versatility"],
    "Shirt": ["all-day comfort", "a professional look", "easy care", "versatile styling", 
              "a perfect fit", "long-lasting quality", "effortless style", "breathable comfort", 
              "wrinkle resistance", "a crisp appearance", "maximum mobility", "day-to-night wearability", 
              "temperature regulation", "a clean silhouette", "premium comfort"],
    "T-shirt": ["all-day comfort", "a casual look", "easy care", "versatile styling", 
                "a relaxed fit", "long-lasting quality", "effortless style", "breathable comfort", 
                "softness against skin", "everyday wearability", "premium comfort", "a classic look", 
                "color retention", "easy layering", "maximum comfort"],
    "Jeans": ["all-day comfort", "a perfect fit", "long-lasting quality", "versatile styling", 
              "shape retention", "a flattering silhouette", "easy care", "classic style", 
              "maximum mobility", "a modern look", "premium comfort", "day-to-night wearability", 
              "fade resistance", "a timeless appearance", "effortless style"],
    "Pants": ["all-day comfort", "a professional look", "easy care", "versatile styling", 
              "a perfect fit", "long-lasting quality", "wrinkle resistance", "maximum mobility", 
              "a clean silhouette", "breathable comfort", "premium quality", "day-to-night wearability", 
              "a polished appearance", "easy movement", "effortless style"],
    "Skirt": ["all-day comfort", "a flattering silhouette", "easy movement", "versatile styling", 
              "a perfect fit", "long-lasting quality", "effortless elegance", "day-to-night wearability", 
              "a polished appearance", "easy care", "maximum comfort", "a timeless look", 
              "premium quality", "a luxurious feel", "breathable comfort"],
    "Jacket": ["reliable warmth", "weather protection", "a stylish appearance", "versatile layering", 
               "a perfect fit", "long-lasting quality", "maximum mobility", "all-day comfort", 
               "easy care", "temperature regulation", "premium quality", "a modern look", 
               "effortless style", "a flattering cut", "wind resistance"],
    "Coat": ["exceptional warmth", "weather protection", "a luxurious feel", "a perfect fit", 
             "long-lasting quality", "a professional appearance", "versatile styling", "all-day comfort", 
             "maximum insulation", "a timeless look", "premium quality", "effortless elegance", 
             "a flattering silhouette", "temperature regulation", "wind resistance"],
    "Sweater": ["cozy warmth", "all-day comfort", "a flattering fit", "versatile styling", 
                "long-lasting quality", "easy care", "temperature regulation", "a classic look", 
                "premium softness", "breathable warmth", "shape retention", "a timeless appearance", 
                "maximum comfort", "easy layering", "a luxurious feel"],
    "Blouse": ["all-day comfort", "a professional look", "easy care", "versatile styling", 
               "a perfect fit", "effortless elegance", "breathable comfort", "a polished appearance", 
               "long-lasting quality", "day-to-night wearability", "premium comfort", "a feminine look", 
               "maximum mobility", "a flattering silhouette", "easy movement"],
    "Suit": ["a professional appearance", "a perfect fit", "long-lasting quality", "all-day comfort", 
             "effortless elegance", "easy movement", "a polished look", "maximum confidence", 
             "wrinkle resistance", "versatile styling", "premium quality", "a timeless appearance", 
             "breathable comfort", "a flattering silhouette", "easy care"],
    "Shorts": ["all-day comfort", "a perfect fit", "easy movement", "versatile styling", 
               "breathable comfort", "long-lasting quality", "a casual look", "a flattering length", 
               "maximum mobility", "easy care", "premium quality", "a modern appearance", 
               "quick-drying capability", "a relaxed fit", "temperature regulation"]
}

# Occasions by category
occasions = {
    "Dress": ["formal events", "evening parties", "special occasions", "casual outings", 
              "office wear", "summer gatherings", "date nights", "social events", 
              "wedding celebrations", "cocktail parties", "business meetings", "family gatherings", 
              "garden parties", "beach vacations", "night out"],
    "Shirt": ["office settings", "business meetings", "casual outings", "formal events", 
              "weekend activities", "social gatherings", "professional environments", "evening events", 
              "everyday wear", "special occasions", "travel", "date nights", 
              "outdoor activities", "family gatherings", "sporting events"],
    "T-shirt": ["casual outings", "weekend activities", "everyday wear", "lounging at home", 
                "outdoor adventures", "exercise sessions", "summer days", "casual gatherings", 
                "travel", "sports events", "concerts", "beach trips", 
                "hiking adventures", "casual Fridays", "relaxed settings"],
    "Jeans": ["casual outings", "weekend activities", "everyday wear", "social gatherings", 
              "outdoor adventures", "shopping trips", "casual Fridays", "travel", 
              "family gatherings", "evening events", "casual dates", "outdoor concerts", 
              "coffee meet-ups", "casual dinners", "relaxed settings"],
    "Pants": ["office settings", "business meetings", "professional environments", "casual outings", 
              "evening events", "weekend activities", "social gatherings", "travel", 
              "formal occasions", "dining out", "everyday wear", "special occasions", 
              "family gatherings", "date nights", "relaxed settings"],
    "Skirt": ["office settings", "casual outings", "social gatherings", "business meetings", 
              "evening events", "weekend activities", "professional environments", "special occasions", 
              "summer days", "date nights", "family gatherings", "casual Fridays", 
              "shopping trips", "dining out", "formal events"],
    "Jacket": ["outdoor activities", "cool evenings", "casual outings", "office settings", 
               "travel", "everyday wear", "social gatherings", "weekend adventures", 
               "evening events", "spring days", "fall outings", "casual Fridays", 
               "outdoor concerts", "sporting events", "coffee meet-ups"],
    "Coat": ["winter weather", "cold days", "formal events", "professional settings", 
             "outdoor activities", "evening outings", "business meetings", "travel", 
             "special occasions", "everyday winter wear", "social gatherings", "holiday events", 
             "office settings", "city explorations", "weekend activities"],
    "Sweater": ["cool weather", "casual outings", "office settings", "evening gatherings", 
                "weekend activities", "family events", "holiday parties", "outdoor adventures", 
                "everyday wear", "travel", "social gatherings", "cozy nights", 
                "fall days", "winter evenings", "casual Fridays"],
    "Blouse": ["office settings", "business meetings", "professional environments", "social gatherings", 
               "evening events", "casual outings", "special occasions", "date nights", 
               "travel", "family gatherings", "weekend activities", "formal events", 
               "dining out", "casual Fridays", "everyday wear"],
    "Suit": ["business meetings", "formal events", "professional environments", "job interviews", 
             "special occasions", "weddings", "evening galas", "important presentations", 
             "corporate events", "networking events", "cocktail parties", "business dinners", 
             "conferences", "ceremonies", "formal celebrations"],
    "Shorts": ["summer days", "casual outings", "beach trips", "outdoor activities", 
               "weekend adventures", "sporting events", "vacation wear", "hot weather", 
               "lounging at home", "garden activities", "casual gatherings", "hiking trips", 
               "outdoor concerts", "park visits", "summer festivals"]
}

# Generate 200 synthetic clothing items
num_items = 200
synthetic_data = []

for id in range(1, num_items + 1):
    # Randomly select category
    category = random.choice(list(clothing_data.keys()))
    
    # Randomly select style, color, and price
    style = random.choice(clothing_data[category]["styles"])
    color_name = random.choice(list(colors.keys()))
    color_adj = random.choice(colors[color_name])
    
    # Add color_adj only 70% of the time
    if random.random() < 0.7:
        full_color = f"{color_adj} {color_name}"
    else:
        full_color = color_name
    
    # Generate price within range for category
    min_price, max_price = clothing_data[category]["price_range"]
    price = round(random.uniform(min_price, max_price), 2)
    
    # Create name
    name = f"{full_color} {style} {category}"
    
    # Generate description
    description_template = random.choice(description_templates[category])
    
    # Select random elements for description
    feature = random.choice(features[category])
    feature2 = random.choice([f for f in features[category] if f != feature])
    material = random.choice(materials[category])
    benefit = random.choice(benefits[category])
    occasion = random.choice(occasions[category])
    
    # Format description
    description = description_template.format(
        adj=color_adj.lower() if color_adj != full_color else "stylish",
        color=color_name.lower(),
        style=style.lower(),
        occasion=occasion,
        feature=feature,
        feature2=feature2,
        material=material,
        benefit=benefit
    )
    
    # Add to dataset
    synthetic_data.append((id, name, color_name, category, price, description))

# Create DataFrame schema
schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("name", StringType(), False),
    StructField("color", StringType(), False),
    StructField("category", StringType(), False),
    StructField("price", DoubleType(), False),
    StructField("description", StringType(), False)
])

# Create DataFrame
df = spark.createDataFrame(synthetic_data, schema)

# Show some sample data
print("Sample of synthetic clothing data:")
print(f"Total number of items: {df.count()}")

# Save as a Delta table (Databricks specific)
table_name = f"{catalog}.dbdemos_agent_tools.clothing"
df.write.format("delta").mode("overwrite").saveAsTable(table_name)

print(f"Data saved as Delta table: {table_name}")

In [0]:
display(df)


Enable change data feed on table so we can put vector index on top

In [0]:
spark.sql(f"""
ALTER TABLE {table_name} SET TBLPROPERTIES (delta.enableChangeDataFeed = true)
""")