In [1]:
%matplotlib inline
import pandas as pd
import pm4py
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter
import matplotlib.pyplot as plt

# Set style (try different options if this fails)
try:
    plt.style.use('seaborn-v0_8-darkgrid')
except:
    plt.style.use('seaborn')
    
print("All imports successful!")

All imports successful!


In [6]:
import pandas as pd
import re

# Load CSV (Windows-safe path)
df = pd.read_csv(r'results\FIFO_l0.1_actuator_manufacturing_with_rework.csv')

print(f"Total events: {len(df)}")
print(f"Unique cases: {df['case_id'].nunique()}")
print(f"Time range: {df['timestamp'].min():.2f} - {df['timestamp'].max():.2f}")
print(f"\nActivity distribution:\n{df['activity'].value_counts()}")

# 2) Check FIFO order at a pooled station (e.g., ASSEMBLY_2)
st = "ASSEMBLY_2"
x = df[(df["status"]=="running") & (df["activity"]==st)].copy().sort_values("timestamp")
print("\nFirst 10 starts at", st)
print(x[["case_id","timestamp","resource"]].head(10))

# 3) Check that A1 rework returns to the SAME queue/lane
# Find an A1 label that exists in this log (pooled or lane-specific)
a1_labels = [a for a in df["activity"].unique() if re.fullmatch(r"ASSEMBLY_1(_\d+)?", str(a))]
if a1_labels:
    lane = sorted(a1_labels)[0]  # pick the first matching label
    qc_label = "QC_AFTER_ASSEMBLY1" if lane == "ASSEMBLY_1" else f"QC_AFTER_{lane}"
    cid_example = df[(df["activity"]==lane) & (df["status"]=="running")]["case_id"].head(1)
    if not cid_example.empty:
        cid = int(cid_example.iloc[0])
        y = df[(df["case_id"]==cid) & df["activity"].isin([lane, qc_label])].sort_values("timestamp")
        print(f"\nTrace for case {cid} at {lane}:")
        print(y[["timestamp","activity","status","resource"]].head(20))
    else:
        print(f"\nNo runs found for {lane} in this file.")
else:
    print("\nNo ASSEMBLY_1 labels found (pooled or lanes).")

# 4) Visual FIFO check at ASSEMBLY_2 (first 15 starts)
def station_fifo_sample(station):
    return df[(df["activity"]==station) & (df["status"]=="running")] \
             [["case_id","timestamp","resource"]].sort_values("timestamp").head(15)

print("\nVisual FIFO check (first 15 at ASSEMBLY_2):")
print(station_fifo_sample("ASSEMBLY_2"))


Total events: 6005
Unique cases: 477
Time range: 51.90 - 5004.21

Activity distribution:
activity
MOULDING              510
QC_AFTER_MOULDING     510
ASSEMBLY_1            509
QC_AFTER_ASSEMBLY1    509
QC_AFTER_ASSEMBLY2    504
ASSEMBLY_2            504
QC_AFTER_PACKAGING    503
PACKAGING             503
QC_AFTER_SORTING      500
SORTING               500
START                 477
END                   476
Name: count, dtype: int64

First 10 starts at ASSEMBLY_2
     case_id   timestamp          resource
11         1   61.116517  ASSEMBLY2_LINE_1
25         0   69.797968  ASSEMBLY2_LINE_2
33         2   77.668036  ASSEMBLY2_LINE_1
35         3   81.228050  ASSEMBLY2_LINE_2
57         4  101.265794  ASSEMBLY2_LINE_1
59         5  101.389980  ASSEMBLY2_LINE_2
85         7  140.492282  ASSEMBLY2_LINE_2
89         6  146.938405  ASSEMBLY2_LINE_1
103        8  157.136339  ASSEMBLY2_LINE_2
121        9  174.356464  ASSEMBLY2_LINE_1

Trace for case 0 at ASSEMBLY_1:
    timestamp            ac

In [12]:
import pandas as pd
import re

# Load your file
df = pd.read_csv(r'results\FIFO_l0.1_actuator_manufacturing_with_rework.csv')

# 1) Define arrival events per station (first + rework arrivals)
ARRIVALS = {
    "MOULDING":   ["START", "QC_AFTER_MOULDING"],     # first is START, rework from its own QC
    "ASSEMBLY_1": ["QC_AFTER_MOULDING", "QC_AFTER_ASSEMBLY1"],
    "ASSEMBLY_2": ["QC_AFTER_ASSEMBLY1", "QC_AFTER_ASSEMBLY2"],
    "SORTING":    ["QC_AFTER_ASSEMBLY2", "QC_AFTER_SORTING"],
    "PACKAGING":  ["QC_AFTER_SORTING", "QC_AFTER_PACKAGING"],
}

# If you ever run the dedicated-lane scenario, include lanes too:
for i in range(1,6):
    ARRIVALS[f"ASSEMBLY_1_{i}"] = ["QC_AFTER_MOULDING", f"QC_AFTER_ASSEMBLY1_{i}"]

# 2) Build (arrival_time, start_time) **per visit** for each station and case
def station_visits(df, station, arrival_labels):
    # Starts at the station
    starts = df[(df["status"]=="running") & (df["activity"]==station)] \
               .sort_values("timestamp")[["case_id","timestamp","activity","resource"]] \
               .rename(columns={"timestamp":"start_time"})
    if starts.empty:
        return pd.DataFrame(columns=["case_id","visit_idx","arrival_time","start_time","station","resource"])

    # All arrival events for this station
    # START rows are status=="START"; QC rows are status=="gateway"
    starts_evt = df[df["status"]=="START"][["case_id","timestamp"]] \
                   .rename(columns={"timestamp":"arrival_time"}).assign(activity="START")
    qcs = df[(df["status"]=="gateway") & (df["activity"].isin(arrival_labels))] \
            [["case_id","activity","timestamp"]].rename(columns={"timestamp":"arrival_time"})
    arrivals = pd.concat([starts_evt[starts_evt["activity"].isin(arrival_labels)], qcs], ignore_index=True)

    # Keep only arrival rows for cases that actually start at this station
    arrivals = arrivals[arrivals["case_id"].isin(starts["case_id"].unique())] \
                       .sort_values(["case_id","arrival_time","activity"]).copy()

    # Number visits per case independently for arrivals and starts, then align by index
    arrivals["visit_idx"] = arrivals.groupby("case_id").cumcount()
    starts["visit_idx"]   = starts.groupby("case_id").cumcount()

    m = starts.merge(arrivals[["case_id","visit_idx","arrival_time"]],
                     on=["case_id","visit_idx"], how="left")
    m["station"] = station
    return m

all_visits = []
for st, arr_labels in ARRIVALS.items():
    all_visits.append(station_visits(df, st, arr_labels))
visits = pd.concat(all_visits, ignore_index=True)

# 3) FIFO test: in start order, the arrival_time must be non-decreasing (ties OK)
fifo_results = []
for st, g in visits.groupby("station", sort=True):
    g = g.sort_values("start_time")
    # Count strict violations: a later start has an earlier arrival than the previous start
    violations = (g["arrival_time"].diff().dropna() < 0).sum()
    fifo_results.append((st, len(g), int(violations)))
fifo_df = pd.DataFrame(fifo_results, columns=["station","num_starts","fifo_violations"]).sort_values("station")
print(fifo_df)

# 4) If any violations remain, show a few for inspection
bad = []
for st, g in visits.groupby("station", sort=True):
    g = g.sort_values("start_time").reset_index(drop=True)
    mask = g["arrival_time"].diff() < 0
    if mask.any():
        out = g.loc[mask, ["station","case_id","arrival_time","start_time","resource"]].copy()
        bad.append(out.head(5))
if bad:
    print("\nSample suspected FIFO breaches (visit-aligned):")
    print(pd.concat(bad, ignore_index=True))
else:
    print("\nNo FIFO breaches found with visit alignment.")

      station  num_starts  fifo_violations
0  ASSEMBLY_1         509               11
1  ASSEMBLY_2         504               12
2    MOULDING         510                0
3   PACKAGING         503                5
4     SORTING         500               10

Sample suspected FIFO breaches (visit-aligned):
       station case_id  arrival_time   start_time          resource
0   ASSEMBLY_1      33    402.311682   411.599726  LINE_3_ASSEMBLY1
1   ASSEMBLY_1     103   1102.818885  1113.767775  LINE_1_ASSEMBLY1
2   ASSEMBLY_1     138   1455.327242  1465.529719  LINE_3_ASSEMBLY1
3   ASSEMBLY_1     169   1773.614091  1776.680566  LINE_3_ASSEMBLY1
4   ASSEMBLY_1     185   1856.545798  1861.188596  LINE_3_ASSEMBLY1
5   ASSEMBLY_2      36    438.669189   443.125091  ASSEMBLY2_LINE_2
6   ASSEMBLY_2      52    597.094904   599.170323  ASSEMBLY2_LINE_2
7   ASSEMBLY_2      50    603.308664   614.308147  ASSEMBLY2_LINE_1
8   ASSEMBLY_2     103   1113.767775  1130.550345  ASSEMBLY2_LINE_1
9   ASSEMBLY_

  visits = pd.concat(all_visits, ignore_index=True)


In [13]:
import pandas as pd
import re

# Load your file
df = pd.read_csv(r'results\FIFO_l0.1_actuator_manufacturing_with_rework.csv')

# 1) Define arrival events per station (first + rework arrivals)
ARRIVALS = {
    "MOULDING":   ["START", "QC_AFTER_MOULDING"],     # first is START, rework from its own QC
    "ASSEMBLY_1": ["QC_AFTER_MOULDING", "QC_AFTER_ASSEMBLY1"],
    "ASSEMBLY_2": ["QC_AFTER_ASSEMBLY1", "QC_AFTER_ASSEMBLY2"],
    "SORTING":    ["QC_AFTER_ASSEMBLY2", "QC_AFTER_SORTING"],
    "PACKAGING":  ["QC_AFTER_SORTING", "QC_AFTER_PACKAGING"],
}

# If you ever run the dedicated-lane scenario, include lanes too:
for i in range(1,6):
    ARRIVALS[f"ASSEMBLY_1_{i}"] = ["QC_AFTER_MOULDING", f"QC_AFTER_ASSEMBLY1_{i}"]

# 2) Build (arrival_time, start_time) **per visit** for each station and case
def station_visits(df, station, arrival_labels):
    # Starts at the station
    starts = df[(df["status"]=="running") & (df["activity"]==station)] \
               .sort_values("timestamp")[["case_id","timestamp","activity","resource"]] \
               .rename(columns={"timestamp":"start_time"})
    if starts.empty:
        return pd.DataFrame(columns=["case_id","visit_idx","arrival_time","start_time","station","resource"])

    # All arrival events for this station
    # START rows are status=="START"; QC rows are status=="gateway"
    starts_evt = df[df["status"]=="START"][["case_id","timestamp"]] \
                   .rename(columns={"timestamp":"arrival_time"}).assign(activity="START")
    qcs = df[(df["status"]=="gateway") & (df["activity"].isin(arrival_labels))] \
            [["case_id","activity","timestamp"]].rename(columns={"timestamp":"arrival_time"})
    arrivals = pd.concat([starts_evt[starts_evt["activity"].isin(arrival_labels)], qcs], ignore_index=True)

    # Keep only arrival rows for cases that actually start at this station
    arrivals = arrivals[arrivals["case_id"].isin(starts["case_id"].unique())] \
                       .sort_values(["case_id","arrival_time","activity"]).copy()

    # Number visits per case independently for arrivals and starts, then align by index
    arrivals["visit_idx"] = arrivals.groupby("case_id").cumcount()
    starts["visit_idx"]   = starts.groupby("case_id").cumcount()

    m = starts.merge(arrivals[["case_id","visit_idx","arrival_time"]],
                     on=["case_id","visit_idx"], how="left")
    m["station"] = station
    return m

all_visits = []
for st, arr_labels in ARRIVALS.items():
    all_visits.append(station_visits(df, st, arr_labels))
visits = pd.concat(all_visits, ignore_index=True)

# 3) FIFO test: in start order, the arrival_time must be non-decreasing (ties OK)
fifo_results = []
for st, g in visits.groupby("station", sort=True):
    g = g.sort_values("start_time")
    # Count strict violations: a later start has an earlier arrival than the previous start
    violations = (g["arrival_time"].diff().dropna() < 0).sum()
    fifo_results.append((st, len(g), int(violations)))
fifo_df = pd.DataFrame(fifo_results, columns=["station","num_starts","fifo_violations"]).sort_values("station")
print(fifo_df)

# 4) If any violations remain, show a few for inspection
bad = []
for st, g in visits.groupby("station", sort=True):
    g = g.sort_values("start_time").reset_index(drop=True)
    mask = g["arrival_time"].diff() < 0
    if mask.any():
        out = g.loc[mask, ["station","case_id","arrival_time","start_time","resource"]].copy()
        bad.append(out.head(5))
if bad:
    print("\nSample suspected FIFO breaches (visit-aligned):")
    print(pd.concat(bad, ignore_index=True))
else:
    print("\nNo FIFO breaches found with visit alignment.")

      station  num_starts  fifo_violations
0  ASSEMBLY_1         509               11
1  ASSEMBLY_2         504               12
2    MOULDING         510                0
3   PACKAGING         503                5
4     SORTING         500               10

Sample suspected FIFO breaches (visit-aligned):
       station case_id  arrival_time   start_time          resource
0   ASSEMBLY_1      33    402.311682   411.599726  LINE_3_ASSEMBLY1
1   ASSEMBLY_1     103   1102.818885  1113.767775  LINE_1_ASSEMBLY1
2   ASSEMBLY_1     138   1455.327242  1465.529719  LINE_3_ASSEMBLY1
3   ASSEMBLY_1     169   1773.614091  1776.680566  LINE_3_ASSEMBLY1
4   ASSEMBLY_1     185   1856.545798  1861.188596  LINE_3_ASSEMBLY1
5   ASSEMBLY_2      36    438.669189   443.125091  ASSEMBLY2_LINE_2
6   ASSEMBLY_2      52    597.094904   599.170323  ASSEMBLY2_LINE_2
7   ASSEMBLY_2      50    603.308664   614.308147  ASSEMBLY2_LINE_1
8   ASSEMBLY_2     103   1113.767775  1130.550345  ASSEMBLY2_LINE_1
9   ASSEMBLY_

  visits = pd.concat(all_visits, ignore_index=True)


In [15]:
import pandas as pd
df = pd.read_csv(r'results\FIFO_l0.1_actuator_manufacturing_with_rework.csv')
print(df['status'].value_counts())   # must include 'queued'

# starts at stations
starts = df[df["status"]=="running"][["activity","case_id","timestamp","resource"]] \
           .rename(columns={"timestamp":"start_time"})
# true station-arrivals
queued = df[df["status"]=="queued"][["activity","case_id","timestamp"]] \
           .rename(columns={"timestamp":"arrival_time"})

# visit-align per (activity, case_id)
starts["visit_idx"] = starts.groupby(["activity","case_id"]).cumcount()
queued["visit_idx"] = queued.groupby(["activity","case_id"]).cumcount()

visits = starts.merge(queued, on=["activity","case_id","visit_idx"], how="left")

rows, windows = [], []
for st, g in visits.groupby("activity", sort=True):
    g = g.sort_values("start_time").reset_index(drop=True)
    diffs = g["arrival_time"].diff()
    violations = int((diffs < 0).sum())
    rows.append((st, len(g), violations))
    # collect small windows around violations to inspect
    for i in g.index[diffs < 0][:5]:
        lo, hi = max(0, i-1), min(len(g)-1, i+1)
        windows.append(g.loc[lo:hi, ["activity","case_id","arrival_time","start_time","resource"]])

fifo = pd.DataFrame(rows, columns=["station","num_starts","fifo_violations"]).sort_values("station")
print(fifo)

if windows:
    print("\nWindows around violations:")
    print(pd.concat(windows, ignore_index=True))
else:
    print("\nNo FIFO breaches found using true queued times.")


status
queued      2526
gateway     2526
running     2526
START        477
COMPLETE     476
Name: count, dtype: int64
      station  num_starts  fifo_violations
0  ASSEMBLY_1         509                0
1  ASSEMBLY_2         504                0
2    MOULDING         510                0
3   PACKAGING         503                0
4     SORTING         500                0

No FIFO breaches found using true queued times.
