In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import json, random

# Config
N_NURSES, N_PATIENTS, DAYS = 10, 30, 7
SEED = 7
AREA_MAX = 12.0
FACTOR_MIN_PER_DIST = 6.5
SHIFT_START, SHIFT_END = 8 * 60, 16 * 60
LAMBDA_OT = 2.5
OUTDIR = Path("./PAwMP_HHC_Dataset")

random.seed(SEED); np.random.seed(SEED)
OUTDIR.mkdir(parents=True, exist_ok=True)

In [2]:
# Sets
K = [f"N{k}" for k in range(1, N_NURSES + 1)]
P = [f"P{j}" for j in range(1, N_PATIENTS + 1)]
D = list(range(1, DAYS + 1))
nodes = ["0"] + P

# Skills
skills = ["A","B","C"]
caregiver_skills = {k: skills[(i % len(skills))] for i, k in enumerate(K)}
caregiver_skills["N10"] = "B"
patient_skills = {p: random.choice(skills) for p in P}

# Coordinates
coords = {"0": (0.0, 0.0)}
for p in P:
    coords[p] = (float(np.random.uniform(0, AREA_MAX)), float(np.random.uniform(0, AREA_MAX)))

# Travel-time matrix
t = pd.DataFrame(0, index=nodes, columns=nodes, dtype=int)
for i in nodes:
    xi, yi = coords[i]
    for j in nodes:
        if i == j: 
            continue
        xj, yj = coords[j]
        base = np.hypot(xi - xj, yi - yj) * FACTOR_MIN_PER_DIST
        noise = np.random.uniform(-2.0, 2.0)
        t.loc[i, j] = max(1, int(np.ceil(base + noise)))

# Durations
service_duration = {p: int(np.random.randint(25, 46)) for p in P}

# Caregiver shifts per day
caregiver_day = pd.DataFrame(
    [{"caregiver_id": k, "day": d, "alpha": SHIFT_START, "beta": SHIFT_END}
     for k in K for d in D]
)

# One required day per patient (balanced 3..6 per day)
def ok(days, lo=3, hi=6):
    c = pd.Series(days).value_counts().reindex(D, fill_value=0)
    return c.min() >= lo and c.max() <= hi

assign_days = np.random.choice(D, size=len(P), replace=True)
tries = 0
while not ok(assign_days) and tries < 5000:
    assign_days = np.random.choice(D, size=len(P), replace=True); tries += 1

# Patient-day requirements & windows
pd_rows = []
for p, d_req in zip(P, assign_days):
    for d in D:
        r = 1 if d == d_req else 0
        if r:
            earliest = int(9*60 + np.random.randint(0, 181))  # 09:00..12:00
            wlen = int(np.random.randint(120, 211))
            latest = min(SHIFT_END - service_duration[p] - 15, earliest + wlen)
            if latest - earliest < 30: latest = earliest + 30
            latest = max(latest, earliest + 30)
            latest = min(latest, SHIFT_END - 15)
        else:
            earliest, latest = 0, -1  # disabled
        pd_rows.append({"patient_id": p, "day": d, "required": r,
                        "e": earliest, "l": latest,
                        "service_duration": service_duration[p]})
patient_day = pd.DataFrame(pd_rows)

# Masters
df_patients = pd.DataFrame({
    "patient_id": P,
    "skill_required": [patient_skills[p] for p in P],
    "service_duration": [service_duration[p] for p in P],
    "x": [coords[p][0] for p in P],
    "y": [coords[p][1] for p in P]
})
df_caregivers = pd.DataFrame({
    "caregiver_id": K,
    "skill": [caregiver_skills[k] for k in K]
})

compatibility = pd.DataFrame(
    [{"patient_id": p, "caregiver_id": k, "compat": 1 if patient_skills[p]==caregiver_skills[k] else 0}
     for p in P for k in K]
)

nodes_df = pd.DataFrame({
    "node_id": nodes, "type": ["depot"] + ["patient"]*len(P),
    "x": [coords[n][0] for n in nodes], "y": [coords[n][1] for n in nodes]
})

# Params
bigM = int((SHIFT_END - SHIFT_START) + int(t.values.max()) + 30)
params = {"lambda_ot": LAMBDA_OT, "M": bigM}

In [3]:
# Save
df_patients.to_csv(OUTDIR/"patients.csv", index=False)
patient_day.to_csv(OUTDIR/"patient_day.csv", index=False)
df_caregivers.to_csv(OUTDIR/"caregivers.csv", index=False)
caregiver_day.to_csv(OUTDIR/"caregiver_day.csv", index=False)
compatibility.to_csv(OUTDIR/"compatibility.csv", index=False)
nodes_df.to_csv(OUTDIR/"nodes.csv", index=False)
t.to_csv(OUTDIR/"travel_time.csv", index=True)
with open(OUTDIR/"params.json", "w") as f: json.dump(params, f, indent=2)

print("✓ Wrote dataset to:", OUTDIR.resolve())


✓ Wrote dataset to: /home/jovyan/PAwMP_HHC_Dataset
