In [8]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Seed for reproducibility
random.seed(42)
np.random.seed(42)

# Employee level data
employee_levels = {
    'L1': {'ids': [f'L1{str(i).zfill(2)}' for i in range(1, 11)], 'weight': 1},
    'L2': {'ids': [f'L2{str(i).zfill(2)}' for i in range(1, 51)], 'weight': 2},
    'L3': {'ids': [f'L3{str(i).zfill(4)}' for i in range(1, 121)], 'weight': 3},
    'L4': {'ids': [f'L4{str(i).zfill(4)}' for i in range(1, 201)], 'weight': 4},
    'L5': {'ids': [f'L5{str(i).zfill(4)}' for i in range(1, 121)], 'weight': 5}
}

# Departments and their preferences
departments = {
    'HR': ['Private Offices', 'Meeting Rooms', 'Collaboration Spaces'],
    'Sales & Marketing': ['Meeting Rooms', 'Collaboration Spaces', 'Breakout Areas'],
    'IT': ['Fixed Workstations', 'Meeting Rooms', 'Collaboration Spaces'],
    'Finance': ['Private Offices', 'Meeting Rooms'],
    'Customer Support': ['Fixed Workstations', 'Breakout Areas']
}

# Workspace types and utilities
workspace_utilities = {
    'Private Offices': [
        'Individual Climate Control', 'Adjustable Lighting', 'Sufficient Power Outlets',
        'Dedicated High-Speed Internet Port (Ethernet)', 'Secure Storage',
        'Ergonomic Chair', 'Monitor Arm'
    ],
    'Managerial Cabins': [
        'Individual Climate Control', 'Adjustable Lighting', 'Sufficient Power Outlets',
        'Dedicated High-Speed Internet Port (Ethernet)', 'Secure Storage',
        'Ergonomic Chair', 'Monitor Arm', 'Small Whiteboard or Pinboard',
        'Guest Chairs', 'Small Personal Refrigerator'
    ],
    'Fixed Workstations': [
        'Sufficient Power Outlets', 'Reliable High-Speed Internet Connection (Wired or Strong Wi-Fi)',
        'Ergonomic Chair', 'Task Lighting', 'Basic Storage', 'Monitor Mount'
    ],
    'Hot Desks': [
        'Easily Accessible Power Outlets', 'Reliable and Strong Wi-Fi',
        'Lockable Storage (Nearby Lockers)', 'Cleanliness and Hygiene Supplies',
        'Adjustable Monitor (Optional)'
    ],
    'Meeting Rooms': [
        'Reliable and Fast Wi-Fi', 'Sufficient Power Outlets', 'Display Screen/Projector and Connectivity',
        'Whiteboard or Flip Chart with Markers and Erasers', 'Comfortable and Ergonomic Seating',
        'Climate Control', 'Good Lighting'
    ],
    'Conference Halls': [
        'Reliable and Fast Wi-Fi', 'Sufficient Power Outlets', 'Professional Audio-Visual Equipment',
        'Presentation Clicker/Pointer', 'Water Dispenser and Glasses',
        'Easily Accessible Waste Bins', 'Adequate Ventilation', 'Comfortable and Ergonomic Seating',
        'Climate Control', 'Good Lighting', 'Whiteboard or Flip Chart'
    ],
    'Collaboration Spaces': [
        'Flexible Power Outlets', 'Reliable and Strong Wi-Fi',
        'Large Interactive Display or Whiteboards', 'Comfortable and Flexible Seating Options',
        'Variety of Table Heights', 'Good Lighting'
    ],
    'Breakout Areas': [
        'Comfortable Seating', 'Sufficient Power Outlets', 'Good Lighting',
        'Access to Water Dispenser and Possibly a Coffee/Tea Station',
        'Waste Bins', 'Small Refrigerator or Microwave'
    ]
}

workspace_types = list(workspace_utilities.keys())
department_list = list(departments.keys())

# Helper function to generate random timestamp during work hours
def generate_timestamp():
    day = random.randint(1, 5)
    hour = random.randint(9, 16)
    minute = random.randint(0, 59)
    return datetime(2025, 4, day, hour, minute).strftime("%Y:%m:%d %H:%M")

# Function to calculate priority level
def calculate_priority(urgency, level, num_employees):
    base = {'L1': 5, 'L2': 4, 'L3': 3, 'L4': 2, 'L5': 1}[level]
    urgency_weight = 2 if urgency == 'URGENT' else 0
    group_weight = 1 if num_employees > 5 else 0
    return base + urgency_weight + group_weight

# Simulate 2000 records
records = []

for _ in range(2000):
    level = random.choices(list(employee_levels.keys()), weights=[1, 2, 4, 6, 3])[0]
    emp_id = random.choice(employee_levels[level]['ids'])
    department = random.choice(department_list)
    preferred_workspaces = departments[department]
    workspace = random.choice(preferred_workspaces)
    num_employees = random.randint(1, min(40, 20 if workspace in ['Meeting Rooms', 'Collaboration Spaces', 'Breakout Areas'] else 4))
    timestamp = generate_timestamp()
    duration = random.randint(1, 4)  # in hours
    utilities = ", ".join(random.sample(workspace_utilities[workspace], k=min(4, len(workspace_utilities[workspace]))))
    urgency = random.choices(['USUAL', 'URGENT'], weights=[0.8, 0.2])[0]
    priority = calculate_priority(urgency, level, num_employees)

    records.append([
        emp_id, level, department, num_employees, timestamp, duration,
        workspace, utilities, urgency, priority
    ])  #Satisfaction score can be calculated as needed using your own custom function

# Create DataFrame
df = pd.DataFrame(records, columns=[
    'Employee ID', 'Employee Hierarchy', 'Department', 'Number of Employees',
    'Request Timestamp', 'Request Duration (hrs)', 'Requested Workspace', 'Workspace Utilities Required',
    'Urgency', 'Priority Level'
])

# Save to CSV
df.to_csv('synthetic_workspace_requests.csv', index=False)
print("Synthetic dataset generated with 2000 rows.")


Synthetic dataset generated with 2000 rows.
