In [1]:
from helper_functions.classes import User, StudentUser, RegularUser, AdminUser, Inventory, Sandwich, Order, Loyalty

import random
from datetime import datetime, timedelta
import pandas as pd

In [2]:

def generate_mock_customers(num_customers=100):
    """
    Generates a list of mock customers with 5% CBS students and 95% regular users.
    """
    customers = []
    for i in range(1, num_customers + 1):
        user_id = f"C{i:03d}"
        name = f"Customer {i}"
        email = f"customer{i}@example.com"
        phone = f"+12345678{i:02d}"
        
        if i <= num_customers * 0.05:  # First 5% are students
            email = f"customer{i}@student.cbs.dk"
            customers.append(StudentUser(user_id, name, email, phone))
        else:
            customers.append(RegularUser(user_id, name, email, phone))
    return customers

def generate_mock_sandwich(inventory):
    """
    Generates a random sandwich ensuring 'No...' restrictions are respected.
    """
    sandwich = Sandwich(inventory)
    sandwich.select_bread(random.choice(list(inventory.available_breads.keys())))
    sandwich.select_spread(random.choice(list(inventory.available_spreads.keys())))
    sandwich.select_protein(random.choice(list(inventory.available_proteins.keys())))
    
    # Vegetables
    if random.random() < 0.1:  # 10% chance of "No vegetables"
        sandwich.add_vegetables(["No vegetables"])
    else:
        sandwich.add_vegetables(
            random.sample(list(inventory.available_vegetables.keys())[1:], random.randint(1, 3))
        )
    
    # Dressing
    sandwich.select_dressing(random.choice(list(inventory.available_dressings.keys())))
    
    # Extras
    if random.random() < 0.2:  # 20% chance of "No extras"
        sandwich.add_extras(["No extras"])
    else:
        sandwich.add_extras(
            random.sample(list(inventory.available_extras.keys())[1:], random.randint(1, 2))
        )
    
    return sandwich

def generate_mock_orders(customers, inventory, start_date, end_date):
    """
    Generates a list of mock orders for a given date range with realistic customer behavior.
    """
    orders = []
    total_days = (end_date - start_date).days + 1
    for customer in customers:
        # Determine yearly sandwich count for customer
        sandwiches_per_year = random.choices(
            [1, 2, random.randint(3, 10)],
            weights=[10, 30, 60],
            k=1
        )[0]
        
        # Generate orders for the customer
        for _ in range(sandwiches_per_year):
            # Random date within the year
            order_date = start_date + timedelta(days=random.randint(0, total_days - 1))
            
            # Random time during the day (weighted for lunch hours)
            if random.random() < 0.5:  # 50% chance of lunch time (11:00-14:00)
                order_time = order_date.replace(hour=random.randint(11, 14), minute=random.randint(0, 59))
            else:
                order_time = order_date.replace(hour=random.randint(8, 19), minute=random.randint(0, 59))
            
            # Create the order
            order = Order(
                order_id=len(orders) + 1,
                customer=customer,
                order_time=order_time,
                inventory=inventory
            )
            
            # Add sandwiches to the order
            for _ in range(random.randint(1, 5)):  # 1 to 5 sandwiches per order
                sandwich = generate_mock_sandwich(inventory)
                order.add_sandwich(sandwich)
            
            # Update customer's order history
            customer.add_order(order)
            orders.append(order)
    
    return orders

def convert_to_dataframes(orders, customers):
    """
    Converts the generated mock data into pandas dataframes for visualizations.
    """
    # Orders DataFrame
    orders_data = []
    for order in orders:
        orders_data.append({
            "Order ID": order.order_id,
            "Customer ID": order.customer.user_id,
            "Customer Name": order.customer.name,
            "Order Time": order.order_time,
            "Number of Sandwiches": len(order.sandwiches),
            "Total Cost (DKK)": order.calculate_total()[0]
        })
    orders_df = pd.DataFrame(orders_data)

    # Customers DataFrame
    customers_data = []
    for customer in customers:
        customers_data.append({
            "Customer ID": customer.user_id,
            "Name": customer.name,
            "Email": customer.email,
            "Phone": customer.phone,
            "Total Sandwiches Purchased": customer.sandwich_count,
            "Number of Orders": len(customer.order_history),
            "Type": "Student" if isinstance(customer, StudentUser) else "Regular"
        })
    customers_df = pd.DataFrame(customers_data)

    # Ingredients DataFrame
    ingredients_usage = {}
    for order in orders:
        for sandwich in order.sandwiches:
            for ingredient in sandwich.vegetables + sandwich.extras:
                ingredients_usage[ingredient] = ingredients_usage.get(ingredient, 0) + 1
    ingredients_df = pd.DataFrame(
        [{"Ingredient": k, "Usage": v} for k, v in ingredients_usage.items()]
    ).sort_values(by="Usage", ascending=False)

    return orders_df, customers_df, ingredients_df

# Generate Mock Data
inventory = Inventory()
customers = generate_mock_customers(num_customers=1000)
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 12, 18)
orders = generate_mock_orders(customers, inventory, start_date, end_date)

# Convert to DataFrames
orders_df, customers_df, ingredients_df = convert_to_dataframes(orders, customers)

In [3]:
print("Orders DataFrame")
orders_df.head()

Orders DataFrame


Unnamed: 0,Order ID,Customer ID,Customer Name,Order Time,Number of Sandwiches,Total Cost (DKK)
0,1,C001,Customer 1,2024-10-15 11:29:00,1,78.85
1,2,C001,Customer 1,2024-12-11 14:57:00,2,174.8
2,3,C001,Customer 1,2024-11-24 14:16:00,2,163.4
3,4,C001,Customer 1,2024-01-08 17:28:00,1,87.4
4,5,C002,Customer 2,2024-10-07 08:30:00,2,163.4


In [4]:
print("\nCustomers DataFrame")
customers_df.head()


Customers DataFrame


Unnamed: 0,Customer ID,Name,Email,Phone,Total Sandwiches Purchased,Number of Orders,Type
0,C001,Customer 1,customer1@student.cbs.dk,1234567801,6,4,Student
1,C002,Customer 2,customer2@student.cbs.dk,1234567802,25,10,Student
2,C003,Customer 3,customer3@student.cbs.dk,1234567803,13,5,Student
3,C004,Customer 4,customer4@student.cbs.dk,1234567804,18,5,Student
4,C005,Customer 5,customer5@student.cbs.dk,1234567805,5,2,Student


In [5]:
print("\nIngredients DataFrame")
ingredients_df.head()


Ingredients DataFrame


Unnamed: 0,Ingredient,Usage
6,Avocado,5649
5,Turkey bacon,5626
1,Cheddar cheese,5531
9,No extras,2784
11,Pickles,1974


In [7]:
ingredients_df.to_csv("simulated_data/ingredients.csv", index=False)
customers_df.to_csv("simulated_data/customers.csv", index=False)
orders_df.to_csv("simulated_data/orders.csv", index=False)