In [None]:
import sqlite3
import pandas as pd
from faker import Faker
import random

In [None]:
# Initialize Faker
fake = Faker()

In [None]:
# Connect to SQLite
conn = sqlite3.connect("fakefashioneasydatabase.db")
cursor = conn.cursor()

In [None]:
# Generate Users with noise
users = []
for i in range(1000):
    email = fake.unique.email()
    # Include intermittent typos in emails
    if random.random() < 0.05:
        email = email.replace("@", random.choice(["@", "!", "#"]))

    users.append((
        fake.unique.random_int(min=1000, max=9999),  # Unique user_id
        fake.name(),
        email,  # Use modified email with typos
        fake.country(),
        fake.zipcode()
    ))
user_df = pd.DataFrame(users, columns=["user_id", "user_name", "email", "user_country", "user_zipcode"])
user_df.to_sql('Users', conn, if_exists='replace', index=False)

1000

In [None]:
# Generate vendor with noise
vendors = []
for i in range(1000):
    vendor_id = fake.random_int(min=1000, max=9999)  # Unique vendor_id
    vendor_name = fake.company()
    location = fake.city()

    # Introduce intermittent typos in vendor names (5% chance)
    if random.random() < 0.05:
        vendor_name = vendor_name.replace(" ", random.choice(["_", "-", ""]))

    vendors.append((vendor_id, vendor_name, location))

vendor_df = pd.DataFrame(vendors, columns=["vendor_id", "vendor_name", "location"])
vendor_df.to_sql('Vendors', conn, if_exists='replace', index=False)


1000

In [None]:
# Product Data (following the same structure)
products = []
for i in range(1000):
    products.append({
        'product_id': i + 1,
        'vendor_id': random.randint(1000, 9999),  # Assuming valid vendor_id range
        'price': round(random.uniform(10, 500), 2),  # Random price
        'inventory': random.randint(1, 100)  # Random stock quantity
    })

product_df = pd.DataFrame(products)
product_df.to_sql('Products', conn, if_exists='replace', index=False)


1000

In [None]:
# Orders Data (with noise)
orders = []
for i in range(2000):  # Assuming 2000 orders
    order_date = fake.date_between(start_date='-1y', end_date='today')

    # Introduce incorrect dates (3% chance)
    if random.random() < 0.03:
        order_date = fake.date_between(start_date='-2y', end_date='-1y')  # Wrong year

    order_status = random.choice(['Pending', 'Completed', 'Cancelled'])

    # Introduce unexpected statuses (3% chance)
    if random.random() < 0.03:
        order_status = random.choice(['Failed', 'On Hold', 'Refunded'])  # Unexpected statuses

    orders.append({
        'order_id':11110 + i,
        'user_id': i + 1,  # Assuming user_id matches order_id sequentially
        'order_date': order_date,
        'order_status': order_status
    })

# Two-line database storage
order_df = pd.DataFrame(orders)
order_df.to_sql('Orders', conn, if_exists='replace', index=False)

2000

In [None]:
# Order Details Data with Noise
order_details = []
for i in range(2000):
    quantity = random.randint(1, 5)  # Random quantity (1-5 items)

    # Introduce incorrect quantities (5% chance)
    if random.random() < 0.05:
        quantity = random.randint(6, 20)  # Unusually high quantity

    delivery_address = fake.address()

    order_details.append({
        'order_id': 11110 + i,
        'product_id': i + 1,
        'quantity': quantity,
        'delivery_address': delivery_address
    })

order_details_df = pd.DataFrame(order_details)
order_details_df.to_sql('OrderDetails', conn, if_exists='replace', index=False)

2000

In [None]:
# Styling Preferences Data (with noise)
styling_preferences = []
for i in range(500):  # Ensuring loop runs 500 times
    preference_details = random.choice([
        'Casual Wear', 'Formal Wear', 'Traditional African', 'Street Style',
        'Business Attire', 'Athleisure', 'Evening Wear', 'Cultural Event',
        'Minimalist', 'Bold & Colorful'
    ])

    # Introduce occasional incorrect values (5% chance)
    if random.random() < 0.05:
        preference_details = "Unknown Preference"  # Unexpected category

    styling_preferences.append({
        'styling_id': i + 1,  # Ensuring unique IDs
        'preference_details': preference_details
    })

# Convert to DataFrame
styling_preferences_df = pd.DataFrame(styling_preferences)

# Insert into database and return row count
if not styling_preferences_df.empty:
    rows_inserted = styling_preferences_df.to_sql('StylingPreferences', conn, if_exists='replace', index=False)
    print(rows_inserted)  # Print the number of rows inserted


500


In [None]:
# Styling Suggestions Data (with noise)
styling_suggestions = []
for i in range(100):  # Assuming 100 styling suggestions
    event_details = random.choice([
        'Wedding', 'Party', 'Corporate Meeting', 'Casual Outing', 'Date Night',
        'Graduation', 'Religious Event', 'Beach Party', 'Festival', 'Night Out'
    ])

    # Introduce occasional incorrect values (5% chance)
    if random.random() < 0.05:
        event_details = "Unknown Event"  # Unexpected category

    styling_suggestions.append({
        'styling_suggestion_id': i + 1,
        'user_id': random.randint(1, 1000),  # Assuming 1000 users exist
        'product_id': random.randint(10000, 10999),  # Assuming product IDs exist in this range
        'styling_id': random.randint(1, 50),  # Assuming 50 styling preferences exist
        'event_details': event_details
    })

styling_suggestions_df = pd.DataFrame(styling_suggestions)
styling_suggestions_df.to_sql('StylingSuggestions', conn, if_exists='replace', index=False)

100