In [None]:
import random
import pandas as pd
from datetime import timedelta, datetime

In [None]:
# Original 30 employees with roles and experience
initial_employees = [
    {"id": i, "name": f"Employee_{i}", "roles": random.sample([
        "Kitchen Porter", "Line Chef (fry)", "Line Chef (ramen)", "Line Chef (wok)",
        "Line Chef (teppan)", "Jr. Sous Chef", "Sr. Sous Chef", "Head Chef"
    ], random.randint(1, 2)), "experience": random.choice(["Junior", "Mid-level", "Senior"]),
    "start_date": datetime(2020, 1, 1), "end_date": None} for i in range(1, 31)
]

# Define role limits
role_limits = {
    "Head Chef": 1,
    "Sr. Sous Chef": 2,
    "Sous Chef": 2,
    "Jr. Sous Chef": 2
}


In [None]:
# Shift templates
shifts = {
    "morning": {"base_start": 7, "base_end": 15},
    "mid": {"base_start": 12, "base_end": 20},
    "evening": {"base_start": 15, "base_end": 23}
}

# Turnover rate: annual probability that an employee leaves
TURNOVER_RATE = 0.1


In [None]:
# Seasonal sales function based on day of week and season
def seasonal_sales(day_of_week, month):
    base_sales = 100
    if day_of_week in ["Saturday", "Sunday"]:
        base_sales *= 1.5
    elif day_of_week == "Monday":
        base_sales *= 0.8

    if month in [6, 7, 8]:  # Summer months
        base_sales *= 1.3
    elif month in [12, 1, 2]:  # Winter months
        base_sales *= 0.9

    return round(base_sales + random.gauss(0, 20))


In [None]:
# Adjust shift times based on demand
def adjust_shift_times(base_start, base_end, high_demand=False):
    start_offset = random.choice([0, 0.5, -0.5]) if high_demand else 0
    end_offset = random.choice([0, 0.5, 1.0]) if high_demand else 0
    return base_start + start_offset, base_end + end_offset


In [None]:
# Determine staffing levels based on daily sales
def determine_staffing_levels(daily_sales):
    if daily_sales > 180:
        return random.randint(5, 8)
    elif daily_sales > 120:
        return random.randint(4, 6)
    else:
        return random.randint(3, 5)

In [None]:
# Manage employee turnover, keeping within management limits
def manage_turnover(employees, year):
    remaining_employees = []
    new_employees = []

    # Track current counts for each management role
    management_count = {
        "Head Chef": sum(1 for e in employees if "Head Chef" in e["roles"] and e["end_date"] is None),
        "Sr. Sous Chef": sum(1 for e in employees if "Sr. Sous Chef" in e["roles"] and e["end_date"] is None),
        "Sous Chef": sum(1 for e in employees if "Sous Chef" in e["roles"] and e["end_date"] is None),
        "Jr. Sous Chef": sum(1 for e in employees if "Jr. Sous Chef" in e["roles"] and e["end_date"] is None)
    }

    for emp in employees:
        # Determine if employee leaves based on turnover rate
        if emp["end_date"] is None and random.random() < TURNOVER_RATE:
            emp["end_date"] = datetime(year, 12, 31)

            # Determine if replacement is within management constraints
            new_roles = []
            for role in emp["roles"]:
                if role not in role_limits or management_count[role] < role_limits[role]:
                    new_roles.append(role)
                    if role in role_limits:
                        management_count[role] += 1  # Update count for management roles

            if new_roles:
                # Create a new employee to replace the one who left
                new_emp = {
                    "id": max(e["id"] for e in employees) + len(new_employees) + 1,
                    "name": f"New_Employee_{len(new_employees) + 1}",
                    "roles": new_roles,
                    "experience": random.choice(["Junior", "Mid-level", "Senior"]),
                    "start_date": datetime(year + 1, 1, 1),
                    "end_date": None
                }
                new_employees.append(new_emp)
        else:
            remaining_employees.append(emp)

    return remaining_employees + new_employees


In [None]:
def generate_schedule_data(start_date, end_date, employees):
    schedule_data = []
    current_year = start_date.year

    # Track the last shift end time for each employee to enforce the 8-hour rule
    last_shift_end = {employee["id"]: None for employee in employees}

    while current_year <= end_date.year:
        # Update employees for the year, including any new hires from turnover
        employees = manage_turnover(employees, current_year)

        # Ensure all employee IDs are in last_shift_end, including new hires
        for employee in employees:
            if employee["id"] not in last_shift_end:
                last_shift_end[employee["id"]] = None

        # Cache employees by ID for quick access
        employee_dict = {e["id"]: e for e in employees}

        # Pre-compute high-demand days for efficiency
        high_demand_days = {}
        for single_date in pd.date_range(start=datetime(current_year, 1, 1), end=datetime(current_year, 12, 31)):
            day_of_week = single_date.strftime("%A")
            month = single_date.month
            daily_sales = seasonal_sales(day_of_week, month)
            high_demand_days[single_date] = daily_sales > 150

        # Filter employees by role type in advance
        line_chefs = [e for e in employees if any("Line Chef" in role for role in e["roles"])]
        managers = [e for e in employees if any(role in ["Jr. Sous Chef", "Sr. Sous Chef", "Head Chef"] for role in e["roles"])]

        for single_date in pd.date_range(start=datetime(current_year, 1, 1), end=datetime(current_year, 12, 31)):
            day_of_week = single_date.strftime("%A")
            daily_sales = seasonal_sales(day_of_week, single_date.month)
            high_demand = high_demand_days[single_date]  # Use precomputed value

            # Track employees who have already been assigned a shift on this day
            assigned_employees_today = set()

            # Helper function to select employees for a shift
            def select_shift_employees(needed_chefs, needed_managers, chef_pool, manager_pool):
                selected_chefs = []
                selected_managers = []

                # Filter based on availability and last shift end time
                eligible_chefs = [
                    e for e in chef_pool
                    if e["id"] not in assigned_employees_today
                    and (last_shift_end[e["id"]] is None or (single_date - last_shift_end[e["id"]]).total_seconds() >= 8 * 3600)
                ]
                eligible_managers = [
                    e for e in manager_pool
                    if e["id"] not in assigned_employees_today
                    and (last_shift_end[e["id"]] is None or (single_date - last_shift_end[e["id"]]).total_seconds() >= 8 * 3600)
                ]

                # Select chefs and managers, with at most 2 per type of chef
                chef_type_count = {"fry": 0, "ramen": 0, "wok": 0, "teppan": 0}
                for chef in eligible_chefs:
                    role = next((r for r in chef["roles"] if "Line Chef" in r), None)
                    if role:
                        chef_type = role.split("(")[1].strip(")")
                        if chef_type_count[chef_type] < 2 and len(selected_chefs) < needed_chefs:
                            chef_type_count[chef_type] += 1
                            selected_chefs.append((chef, role))
                            assigned_employees_today.add(chef["id"])
                    if len(selected_chefs) >= needed_chefs:
                        break

                # Select managers
                for manager in eligible_managers:
                    role = next((r for r in manager["roles"] if r in ["Jr. Sous Chef", "Sr. Sous Chef", "Head Chef"]), None)
                    if role and len(selected_managers) < needed_managers:
                        selected_managers.append((manager, role))
                        assigned_employees_today.add(manager["id"])
                    if len(selected_managers) >= needed_managers:
                        break

                return selected_chefs + selected_managers

            # Morning shift
            morning_employees = select_shift_employees(4, 1, line_chefs, managers)
            for employee, role in morning_employees:
                shift_start, shift_end = adjust_shift_times(
                    shifts["morning"]["base_start"], shifts["morning"]["base_end"], high_demand
                )
                # Occasionally extend shift to 17:00
                if random.random() < 0.2:
                    shift_end = 17
                schedule_data.append({
                    "employee_id": employee["id"],
                    "employee_name": employee["name"],
                    "experience": employee["experience"],
                    "date": single_date,
                    "day_of_week": day_of_week,
                    "role": role,
                    "shift": "morning",
                    "shift_start": shift_start,
                    "shift_end": shift_end,
                    "sales": daily_sales,
                    "is_absent": False
                })
                last_shift_end[employee["id"]] = single_date + timedelta(hours=shift_end - shift_start)

            # Mid shift (only on high-demand days)
            if high_demand:
                mid_shift_count = random.choice([1, 2])
                mid_shift_employees = random.sample([
                    e for e in employees
                    if e["id"] not in assigned_employees_today
                    and (last_shift_end[e["id"]] is None or (single_date - last_shift_end[e["id"]]).total_seconds() >= 8 * 3600)
                ], mid_shift_count)
                for employee in mid_shift_employees:
                    role = random.choice(employee["roles"])
                    shift_start, shift_end = adjust_shift_times(
                        shifts["mid"]["base_start"], shifts["mid"]["base_end"], high_demand
                    )
                    schedule_data.append({
                        "employee_id": employee["id"],
                        "employee_name": employee["name"],
                        "experience": employee["experience"],
                        "date": single_date,
                        "day_of_week": day_of_week,
                        "role": role,
                        "shift": "mid",
                        "shift_start": shift_start,
                        "shift_end": shift_end,
                        "sales": daily_sales,
                        "is_absent": False
                    })
                    assigned_employees_today.add(employee["id"])  # Mark as assigned
                    last_shift_end[employee["id"]] = single_date + timedelta(hours=shift_end - shift_start)

            # Evening shift
            evening_employees = select_shift_employees(4, 1, line_chefs, managers)
            for employee, role in evening_employees:
                shift_start, shift_end = adjust_shift_times(
                    shifts["evening"]["base_start"], shifts["evening"]["base_end"], high_demand
                )
                # Occasionally extend shift to 01:00
                if random.random() < 0.2:
                    shift_end += 2
                schedule_data.append({
                    "employee_id": employee["id"],
                    "employee_name": employee["name"],
                    "experience": employee["experience"],
                    "date": single_date,
                    "day_of_week": day_of_week,
                    "role": role,
                    "shift": "evening",
                    "shift_start": shift_start,
                    "shift_end": shift_end,
                    "sales": daily_sales,
                    "is_absent": False
                })
                assigned_employees_today.add(employee["id"])  # Mark as assigned
                last_shift_end[employee["id"]] = single_date + timedelta(hours=shift_end - shift_start)

        current_year += 1  # Move to next year

    return pd.DataFrame(schedule_data)


In [None]:
# Define start and end dates for the 5-year period
start_date = datetime(2020, 1, 1)
end_date = datetime(2025, 1, 1)

# Generate synthetic schedule data with turnover and management constraints
synthetic_schedule = generate_schedule_data(start_date, end_date, initial_employees)

# Display the initial data structure
synthetic_schedule.head(10)


Unnamed: 0,employee_id,employee_name,experience,date,day_of_week,role,shift,shift_start,shift_end,sales,is_absent
0,1,Employee_1,Mid-level,2020-01-01,Wednesday,Line Chef (wok),morning,7.0,17.0,81,False
1,2,Employee_2,Senior,2020-01-01,Wednesday,Line Chef (wok),morning,7.0,15.0,81,False
2,3,Employee_3,Senior,2020-01-01,Wednesday,Line Chef (ramen),morning,7.0,15.0,81,False
3,5,Employee_5,Mid-level,2020-01-01,Wednesday,Line Chef (ramen),morning,7.0,15.0,81,False
4,1,Employee_1,Mid-level,2020-01-01,Wednesday,Jr. Sous Chef,morning,7.0,17.0,81,False
5,7,Employee_7,Senior,2020-01-01,Wednesday,Line Chef (teppan),evening,15.0,23.0,81,False
6,8,Employee_8,Senior,2020-01-01,Wednesday,Line Chef (fry),evening,15.0,23.0,81,False
7,9,Employee_9,Senior,2020-01-01,Wednesday,Line Chef (ramen),evening,15.0,23.0,81,False
8,11,Employee_11,Senior,2020-01-01,Wednesday,Line Chef (teppan),evening,15.0,23.0,81,False
9,4,Employee_4,Mid-level,2020-01-01,Wednesday,Sr. Sous Chef,evening,15.0,23.0,81,False


In [None]:
employee_roles = synthetic_schedule.groupby(['employee_id', 'role']).size().reset_index(name='shift_count')
print("Roles and shift counts for each employee:")
print(employee_roles)

Roles and shift counts for each employee:
    employee_id                role  shift_count
0             1       Jr. Sous Chef         2192
1             1     Line Chef (wok)         2192
2             2     Line Chef (wok)         2192
3             3   Line Chef (ramen)         2192
4             4      Kitchen Porter           11
5             4       Sr. Sous Chef         2181
6             5   Line Chef (ramen)         1827
7             6      Kitchen Porter           23
8             7     Line Chef (fry)           10
9             7  Line Chef (teppan)         2182
10            8      Kitchen Porter           14
11            8     Line Chef (fry)         2178
12            9   Line Chef (ramen)         2192
13           11       Jr. Sous Chef           35
14           11  Line Chef (teppan)         2180
15           12       Jr. Sous Chef            1
16           12   Line Chef (ramen)           16
17           13      Kitchen Porter           10
18           13     Line Ch