In [None]:
import pandas as pd
import numpy as np
import re

## Exercise 2

In [None]:
df = pd.read_csv(
    r"data\test.txt",
    sep=r"\s+",
    header=None,
    names=["time", "lot", "event"],
).sort_values("time")

def get_ept_data(df):
    def detOvert(xs, i):
        ys = []
        while xs:
            j, aw = xs[0]       # head(xs)
            xs = xs[1:]         # tail(xs)
            if j < i:
                ys.append((j, aw))
            elif j == i:
                return ys + xs, len(ys), aw 
        raise ValueError(f"Lot {i} not found in system")

    initial_wip = 0
    non_tracked_lots = list()  # lots not tracked by the system

    # count initial WIP 
    for lot in df.lot.unique():
        if not df.loc[df.lot == lot, "event"].str.contains("A").any():
            initial_wip += 1
            non_tracked_lots.append(lot)

    # prefil xs with initial WIP
    xs = [(lot, initial_wip) for lot in non_tracked_lots] #state of system,
    s, sw = None, None          #  EPT start time, WIP at start
    records = []                        # output rows

    for τ, i, ev, *_ in df.itertuples(index=False, name=None):

        if ev.upper() == "A":           
            if not xs:                  # empty system -> new EPT
                s, sw = τ, 1
            xs.append((i, len(xs)))     # len(xs) is WIP before this arrival

        elif ev.upper() == "D":         

            xs, k, aw = detOvert(xs, i)

            if i not in non_tracked_lots:
                # determine EPT
                ept = τ - s
                # save record
                records.append(dict(lot=i, EPT=ept, sw=sw, k=k, aw=aw))

            # start a new EPT if system still contains lots
            if xs:
                s, sw = τ, len(xs)

        else:
            raise ValueError(f"Unknown event type '{ev}'")
    results = pd.DataFrame(records)
    return results
    

results = get_ept_data(df)


print(results.head(20))
print(f"\nMean Effective Processing Time: {results.EPT.mean():.2f} minutes")


        lot  EPT  sw  k  aw
0  lot00005    4   2  1   3
1  lot00006    3   2  0   2
2  lot00007    0   1  0   3

Mean Effective Processing Time: 2.33 minutes


## Exercise 4

In [52]:
df = pd.read_csv(
    r"data\group07.txt",
    sep=r"\s+",
    header=None,
    names=["time", "lot", "machine", "event"],
).sort_values("time")

### Exercise 3: Data Engineering

In [None]:

def expand_flow(df: pd.DataFrame) -> pd.DataFrame:
    def next_machine_str(mach: str) -> str:

        machine_re = re.compile(r"^W(\d+)$")
        m = machine_re.match(mach)
        if not m:
            raise ValueError(f"Cannot compute next machine for {mach!r}")
        return f"W{int(m.group(1)) + 1}"

    df = df.copy()

    # Created to W1 / A
    mask_created = df["machine"].eq("Created")
    df.loc[mask_created, "machine"] = "W1"
    df.loc[mask_created, "event"]   = "A"

    # departures  to  arrivals 
    dep_mask      = df["event"].eq("D")
    arrivals_next = df[dep_mask].copy()

    arrivals_next["machine"] = arrivals_next["machine"].apply(next_machine_str)
    arrivals_next["event"]   = "A"


    out = (
        pd.concat([df, arrivals_next], ignore_index=True)
          .sort_values("time", kind="mergesort") 
          .reset_index(drop=True)
    )
    return out

df = expand_flow(df)

# handle W2
df2 = df.loc[df.machine == "W2"].copy()
df2.head


Unnamed: 0,time,lot,machine,event
4,74188,lot00994,W2,A
6,74218,lot00995,W2,A
9,74298,lot00997,W2,A
11,74317,lot00989,W2,D
12,74317,lot00987,W2,D


### Make batch rows for machine 2

In [None]:
def build_batch_view(df: pd.DataFrame):
    # Simultaneous departures are a batch
    dep = df[df.event.str.upper() == "D"].copy()
    dep["group"] = dep.groupby("time").ngroup()

    batches = dep.groupby("group").filter(lambda g: len(g) > 0).copy()

    # create a batch id for each group
    group2virtual = {g: f"B{n}" for n, g in enumerate(sorted(batches.group.unique()))}
    batches["virtual"] = batches.group.map(group2virtual)

    lot2batch  = dict(zip(batches.lot, batches.virtual))

    # arrival = latest arrival of any member in the batch
    arrivals = (
        df[df.event.str.upper() == "A"]
          .loc[df.lot.isin(lot2batch)]                 # only batched lots
          .assign(virtual=lambda d: d.lot.map(lot2batch))
          .groupby("virtual", as_index=False)
          .agg(time=("time", "max"),
               lot_ids=("lot", lambda x: list(x)))     # collect ids
          .assign(event="A", lot=lambda d: d.virtual)  # rename columns
          .drop(columns="virtual")
    )

    # departure = common departure time 
    departures = (
        batches.groupby("virtual", as_index=False)
               .agg(time=("time", "first"),
                    lot_ids=("lot", lambda x: list(x)))
               .assign(event="D", lot=lambda d: d.virtual)
               .drop(columns="virtual")
    )

    drop_lots = set(lot2batch)                       # originals replaced by batch rows

    df_batch = (
        pd.concat([df, arrivals, departures], ignore_index=True)
          .loc[lambda d: ~d["lot"].isin(drop_lots)]  
          .sort_values("time")
          .reset_index(drop=True)
    )

    return df_batch


df2_batch = build_batch_view(df2)
df2_batch= df2_batch.reindex(columns=["time", "lot", "event", "lot_ids", "machine"] )
df2_batch.head(20)

Unnamed: 0,time,lot,event,lot_ids,machine
0,74317,B0,D,"[lot00989, lot00987, lot00990, lot00982, lot00...",
1,74611,B1,A,"[lot00994, lot00995, lot00997, lot00996, lot00...",
2,75242,B1,D,"[lot00992, lot00991, lot01000, lot00999, lot00...",
3,75404,B2,A,"[lot01001, lot01003, lot01004, lot01005, lot01...",
4,75952,B2,D,"[lot01008, lot01002, lot01001, lot01007, lot01...",
5,75964,B3,A,"[lot01013, lot01012, lot01011, lot01015, lot01...",
6,76474,B3,D,"[lot01012, lot01015, lot01011, lot01013, lot01...",
7,76687,B4,A,"[lot01022, lot01021, lot01024, lot01023, lot01...",
8,77283,B4,D,"[lot01022, lot01021, lot01024, lot01023, lot01...",
9,77446,B5,A,"[lot01033, lot01030, lot01035, lot01036, lot01...",


### Now we can run the original Algorithm

In [67]:
batch_ept = get_ept_data(df2_batch)
batch_ept.head(20)

Unnamed: 0,lot,EPT,sw,k,aw
0,B1,631,1,0,0
1,B2,548,1,0,0
2,B3,510,1,0,0
3,B4,596,1,0,0
4,B5,527,1,0,0
5,B6,516,1,0,0
6,B7,523,1,0,0
7,B8,605,1,0,0
8,B9,665,1,0,0
9,B10,544,1,0,0
