In [1]:
import pandas as pd
import numpy as np
import statistics as stat

#--- Functions ---#
#-- Cleaning up source data --#
def clean_sd(path,ec):
    cd = []
    with open(path,'r') as f:
        for line in f:
            columns = line.strip().split('\t')
            if len(columns)<ec:
                columns += [''] * (ec - len(columns))
            elif len(columns)<ec:
                columns = columns[:2] + [' '.join(columns[2:])]
            
            cd.append(columns)

    return cd

def complete_sd(path,cn):
    """
    Completes source data by adding arrival events to corresponding workstations
    """
    sd = []
    WS = ["W1","W2","W3"]
    with open(path,'r') as f:
        for line in f:
            columns = line.strip().split('\t')
            #Translate creation event into arrival at WS 1
            if columns[2] == "Created":
                columns[2] = WS[0]
                columns.append("A")
                sd.append(columns)
            
            elif columns[3] == "D":
                sd.append(columns)
                W_departing = WS.index(columns[2])

                if W_departing < 2:
                    new_entry = [None]*4
                    new_entry[:2] = columns[:2]
                    new_entry[2:] = [WS[W_departing+1],"A"]
                    sd.append(new_entry)
    
    sd_c = pd.DataFrame(sd,columns=cn)
    sd_c["time"] = pd.to_numeric(sd_c["time"])
    return sd_c

def split_sd(sd_c,ws):
    """
    Devide completed source data per workstation
    """
    sd_s = sd_c.loc[sd_c.workstation == ws].copy()
    sd_s = sd_s.drop("workstation",axis=1)
    return sd_s

def get_arrival(sd_c,hb):
    """
    Determines the mean, standard deviation and coefficient of covariance for lots and batches (if applicable)
    Returns the batch structure if applicable
    """
    # select arrival data and determine the difference between arrivals
    Lot_dT = sd_c[sd_c['event'] == 'A']['time'].diff()
    # Determine mean, standard deviation and coefficient of covariance
    Lot_mean = Lot_dT.mean()
    Lot_std = Lot_dT.std()
    Lot_CoV = Lot_std/Lot_mean
    
    # handle as batch
    if hb:
        # build batch data structure
        Batch = build_batch_view(sd_c)
        # select arrival data and determine the difference between arrivals
        Batch_dT = Batch[Batch['event'] == 'A']['time'].diff()
        Batch_mean = Batch_dT.mean()
        Batch_std = Batch_dT.std()
        Batch_CoV = Batch_std/Batch_mean
    else:
        Batch = None
        Batch_mean = None
        Batch_std = None
        Batch_CoV = None

    result = pd.DataFrame({
        "Workstation": ["Batch", "Lot"],
        "Mean": [Batch_mean, Lot_mean],
        "Std": [Batch_std, Lot_std],
        "CoV": [Batch_CoV, Lot_CoV]})

    # print(result)
    return result, Batch

def get_distribution(EPT_r):
    """
    Determines the EPT mean, standard deviation and coefficient of variance
    Additionally the amount of realizations is determined for reporting purpouses
    """
    # Group data by WIP levels and determine the count, mean and standard deviation for each WIP level
    EPT_distribution = EPT_r.groupby('sw')['EPT'].agg(['count','mean','std'])
    # Determine the covariance: standard deviation / mean
    EPT_distribution['CoV'] = EPT_distribution['std'] / EPT_distribution['mean']

    # Group data by Number of lots in the buffer and determine the count, mean and standard deviation for each WIP level
    OT_distribution = EPT_r.groupby('aw')['k'].agg(['count','mean','std'])
    # Determine the covariance: standard deviation / mean
    OT_distribution['CoV'] = OT_distribution['std'] / OT_distribution['mean']
    
    #print(EPT_distribution)
    #print(OT_distribution)
    return EPT_distribution, OT_distribution

#-- EPT calculations --#
def get_ept_data(df):
    def detOvert(xs, i):
        ys = []
        while xs:
            j, aw = xs[0]       # head(xs)
            xs = xs[1:]         # tail(xs)
            if j < i:
                ys.append((j, aw))
            elif j == i:
                return ys + xs, len(ys), aw 
        raise ValueError(f"Lot {i} not found in system")

    initial_wip = 0
    non_tracked_lots = list()  # lots not tracked by the system

    # count initial WIP 
    for lot in df.lot.unique():
        if not df.loc[df.lot == lot, "event"].str.contains("A").any():
            initial_wip += 1
            non_tracked_lots.append(lot)

    # prefil xs with initial WIP
    xs = [(lot, initial_wip) for lot in non_tracked_lots] #state of system,
    s, sw = None, None          #  EPT start time, WIP at start
    records = []                        # output rows

    for τ, i, ev, *_ in df.itertuples(index=False, name=None):

        if ev.upper() == "A":           
            if not xs:                  # empty system -> new EPT
                s, sw = τ, 1
            xs.append((i, len(xs)))     # len(xs) is WIP before this arrival

        elif ev.upper() == "D":         

            xs, k, aw = detOvert(xs, i)

            if i not in non_tracked_lots:
                # determine EPT
                ept = τ - s
                # save record
                records.append(dict(lot=i, EPT=ept, sw=sw, k=k, aw=aw))

            # start a new EPT if system still contains lots
            if xs:
                s, sw = τ, len(xs)

        else:
            raise ValueError(f"Unknown event type '{ev}'")
    results = pd.DataFrame(records)
    return results

def build_batch_view(df: pd.DataFrame):
    # Simultaneous departures are a batch
    dep = df[df.event.str.upper() == "D"].copy()
    dep["group"] = dep.groupby("time").ngroup()

    batches = dep.groupby("group").filter(lambda g: len(g) > 0).copy()

    # create a batch id for each group
    group2virtual = {g: f"B{n}" for n, g in enumerate(sorted(batches.group.unique()))}
    batches["virtual"] = batches.group.map(group2virtual)

    lot2batch  = dict(zip(batches.lot, batches.virtual))

    # arrival = latest arrival of any member in the batch
    arrivals = (
        df[df.event.str.upper() == "A"]
          .loc[df.lot.isin(lot2batch)]                 # only batched lots
          .assign(virtual=lambda d: d.lot.map(lot2batch))
          .groupby("virtual", as_index=False)
          .agg(time=("time", "max"),
               lot_ids=("lot", lambda x: list(x)))     # collect ids
          .assign(event="A", lot=lambda d: d.virtual)  # rename columns
          .drop(columns="virtual")
    )

    # departure = common departure time 
    departures = (
        batches.groupby("virtual", as_index=False)
               .agg(time=("time", "first"),
                    lot_ids=("lot", lambda x: list(x)))
               .assign(event="D", lot=lambda d: d.virtual)
               .drop(columns="virtual")
    )

    drop_lots = set(lot2batch)                       # originals replaced by batch rows

    df_batch = (
        pd.concat([df, arrivals, departures], ignore_index=True)
          .loc[lambda d: ~d["lot"].isin(drop_lots)]  
          .sort_values("time")
          .reset_index(drop=True)
    )

    return df_batch


#--- Take Home solver ---#
#-- variables --#
# source data
sd_path = '4DM40/data/group07.txt'
expected_nr_columns = 4
column_names = ['time', 'lot', "workstation", 'event']

#-- Load assignment data --#
sd_completed = complete_sd(sd_path,column_names)
sd_W1 = split_sd(sd_completed,"W1")
sd_W2 = split_sd(sd_completed,"W2")
sd_W3 = split_sd(sd_completed,"W3")

Arival_W1, _ = get_arrival(sd_W1,False)
Arival_W2, Batch_W2 = get_arrival(sd_W2,True)

EPT_W1r = get_ept_data(sd_W1)
EPT_W2r = get_ept_data(Batch_W2)
EPT_W3r = get_ept_data(sd_W3)

[EPTd_W1, OTd_W1] = get_distribution(EPT_W1r)
[EPTd_W2, OTd_W2] = get_distribution(EPT_W2r)
[EPTd_W3, OTd_W3] = get_distribution(EPT_W3r)



# Extract ca  from W1 data
ca_for_simulation = Arival_W1.loc[Arival_W1.Workstation == 'Lot', 'CoV'].iloc[0]

# Format EPT distributions into lists of (mean, CoV) tuples
def format_epts_from_df(ept_df, workstation_name, default_mean=100.0, default_cov=1.0):
    """
    Converts EPT DataFrame to list of (mean, CoV) tuples.
    Handles empty DFs, NaN CoV, and sorts by original index (sw level).
    """

    ept_df_sorted = ept_df.sort_index()


    ept_df_sorted['CoV'] = ept_df_sorted['CoV'].replace([np.inf, -np.inf], np.nan) # Convert inf to NaN first
    ept_df_sorted['CoV'] = ept_df_sorted['CoV'].fillna(1.0) # Replace NaN CoV with 1.0
    ept_df_sorted['CoV'] = ept_df_sorted['CoV'].apply(lambda x: 1.0 if x <= 0 else x) # Ensure CoV > 0 for gamma distribution

    epts_list = list(zip(ept_df_sorted['mean'], ept_df_sorted['CoV']))
    

    epts_list_cleaned = [(m, c) for m, c in epts_list if pd.notna(m) and m > 0]
    
    return epts_list_cleaned


actual_epts_ws1 = format_epts_from_df(EPTd_W1, "Workstation 1", default_mean=180.0)
actual_epts_ws2 = format_epts_from_df(EPTd_W2, "Workstation 2 (Batched)", default_mean=120.0)
actual_epts_ws3 = format_epts_from_df(EPTd_W3, "Workstation 3", default_mean=90.0)



In [None]:

# =================================
# Imports
# =================================
from PyCh import *
from numpy import random
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# =================================
# Simulation parameters
# =================================
default_ta    = 50
default_ca    = ca_for_simulation
warmupcount   = 1000
lotcount      = 30000
shiftduration = 360.0 # minutes in a 6-hour shift

# Define your EPTs lists
EPTs_w1 = actual_epts_ws1
EPTs_w2 = actual_epts_ws2
EPTs_w3 = actual_epts_ws3

# ====================================================
# Functions and processes for simulation
# ====================================================
def Model(current_ta, current_ca, current_EPTs, current_warmupcount, current_lotcount, current_shiftduration):
    """
    Defines and runs the discrete event simulation model.
    Returns a dictionary with simulation results.
    """
    env = Environment()
    a = Channel(env)
    b = Channel(env)
    G = Generator(env, a, current_ta, current_ca, current_warmupcount, current_lotcount) # Pass counts to Generator
    W = Workstation(env, a, b, current_EPTs)
    E = Exit(env, b, current_warmupcount, current_lotcount, current_shiftduration)
    

    env.run(until=E)
    
    if hasattr(E, 'value'):
        return E.value
    else:
        print(f"Warning: Process E (Exit) does not have a '.value' attribute after running. "
              f"PyCh mechanism for retrieving process return value is unknown. Returning empty dict.")
        return {}


@process
def Generator(env, c_out, ta_param, ca_param, warmup_p, lotcount_p): # Added warmup and lotcount
    dist = lambda: random.gamma(1/(ca_param*ca_param), ca_param*ca_param*ta_param)
    for _ in range(warmup_p + lotcount_p + 5):
        x = env.time
        yield env.execute(c_out.send(x))
        delay = dist()
        yield env.timeout(delay)

@process
def Workstation(env, c_in, c_out, EPTs_param):
    c_bm = Channel(env)
    B = Buffer(env, c_in, c_bm)
    S = Server(env, c_bm, c_out, EPTs_param)
    yield B
    yield S

@process
def Buffer(env, c_in, c_out):
    xs = []
    while True:
        sending = c_out.send((xs[0],len(xs)-1)) if len(xs)>0 else None
        receiving = c_in.receive()
        # Check if we received a stop signal (e.g., None) if Generator stops
        if receiving is None and not xs: # If generator might send None to signal end
             break
        x = yield env.select(sending, receiving)

        if selected(receiving):
            if x is None and not xs : # If generator sends None to signal end
                break
            if x is not None:
                 xs = xs + [x]
        if selected(sending):
            xs = xs[1:]

@process
def Server(env, c_in, c_out, EPTs_param):
    u = [lambda te=te_val, ce=ce_val: random.gamma(1/(ce*ce), ce*ce*te) for (te_val,ce_val) in EPTs_param]
    maxindex=len(u)-1
    while True:
        received_data = yield env.execute(c_in.receive())
        if received_data is None: # If Buffer signals end-of-stream
            break
        (lot,index) = received_data
        
        delay = u[min(index,maxindex)]()
        yield env.timeout(delay)
        yield env.execute(c_out.send(lot))

@process
def Exit(env, c_in, local_warmupcount, local_lotcount, local_shiftduration):
    """
    Exit process:
    Collects all jobs leaving the system.
    Calculates mean flow time and throughput for measured lots.
    Returns a dictionary of results.
    """
    # Local variables for calculations, no 'self' needed for this.
    n_processed_total = 0
    n_measured = 0
    sum_flowtime_measured = 0.0
    # For more accurate throughput, track start/end of measurement period
    measurement_start_time = -1
    measurement_end_time = -1

    results_dict = {} # Initialize results dictionary

    while n_measured < local_lotcount:
        lot_arrival_time_at_generator = yield env.execute(c_in.receive()) # Changed from env.select
        
        if lot_arrival_time_at_generator is None: # If Server signals end-of-stream
            print("Warning: Exit received None from input channel, simulation might be ending.")
            break

        n_processed_total += 1

        if n_processed_total > local_warmupcount:
            if n_measured == 0: # First measured lot
                measurement_start_time = env.now # Or time it exited warmup - more complex
            
            n_measured += 1
            flowtime = env.now - lot_arrival_time_at_generator
            sum_flowtime_measured += flowtime
            
            if n_measured == local_lotcount: # Last measured lot
                measurement_end_time = env.now
    
    if n_measured > 0:
        meanflowtime = sum_flowtime_measured / n_measured
        
        # Throughput calculation for measured period
        if measurement_end_time > 0 and measurement_start_time > 0 and n_measured >1 : 
            pass # Let's stick to the simpler throughput for now.

        throughput_per_shift_original_method = (n_measured * local_shiftduration) / env.now

        print(f"Mean flowtime (in minutes): {meanflowtime:.2f}; Throughput (lots/6h shift, approx by total time): {throughput_per_shift_original_method:.2f}")
        results_dict = {
            "mean_flowtime_min": meanflowtime,
            "throughput_lots_per_shift": throughput_per_shift_original_method,
            "total_lots_measured": n_measured,
            "simulation_duration_min": env.now
        }
    else:
        print("No lots were measured (after warmup). Check simulation parameters.")
        results_dict = {
            "mean_flowtime_min": float('nan'),
            "throughput_lots_per_shift": float('nan'),
            "total_lots_measured": 0,
            "simulation_duration_min": env.now
        }
    return results_dict 


# ====================================================
# Running the simulation and collecting data 
# ====================================================
ta_levels_to_test = np.arange(600, 800, 40) 
EPT_sets_to_test = {

    "W2": EPTs_w2,

}

all_results = []

print("==========================================")
print("Starting Simulation Runs")
print("==========================================")

for ta_val in ta_levels_to_test:
    for ept_name, ept_list in EPT_sets_to_test.items():
        print(f"\nRunning simulation with: ta = {ta_val}, ca = {default_ca}, EPTs = {ept_name}")
        random.seed(int(ta_val) + len(ept_name))

        run_results = Model(current_ta=ta_val,
                              current_ca=default_ca,
                              current_EPTs=ept_list,
                              current_warmupcount=warmupcount,
                              current_lotcount=lotcount,
                              current_shiftduration=shiftduration)
        
        if run_results: # Check if Model returned results
            run_results['ta'] = ta_val
            run_results['ca'] = default_ca
            run_results['ept_set_name'] = ept_name
            all_results.append(run_results)
        else:
            print(f"Warning: Model run for ta={ta_val}, EPTs={ept_name} returned no results.")
        print("------------------------------------------")

print("\nAll simulation runs completed.")

if all_results:
    results_df = pd.DataFrame(all_results)
    cols_order = ['ept_set_name', 'ta', 'ca', 'mean_flowtime_min', 'throughput_lots_per_shift', 'total_lots_measured', 'simulation_duration_min']
    actual_cols = [col for col in cols_order if col in results_df.columns]
    results_df = results_df[actual_cols]
    
    csv_filename = "simulation_results.csv"
    results_df.to_csv(csv_filename, index=False)
    print(f"\nResults saved to {csv_filename}")
    print("\nSummary Results Table:")
    print(results_df.to_string())
else:
    print("No results to save or plot.")


if all_results and not results_df.empty:
    for ept_name_plot, group in results_df.groupby('ept_set_name'):
        # Plot 1: Mean Flowtime vs. ta for the current EPT Set
        plt.figure(figsize=(10, 6))
        plt.plot(group['ta'], group['mean_flowtime_min'], marker='o', linestyle='-')
        plt.xlabel('Mean Inter-Arrival Time (ta) [minutes]')
        plt.ylabel('Mean Flowtime [minutes]')
        plt.title(f'Mean Flowtime vs. Inter-Arrival Time for {ept_name_plot}')
        plt.grid(True)
        filename_flow = f"flowtime_vs_ta_{ept_name_plot}.png"
        plt.savefig(filename_flow)
        print(f"Plot '{filename_flow}' saved.")
        plt.show()
        
        # Plot 2: Throughput vs. ta for the current EPT Set
        plt.figure(figsize=(10, 6))
        plt.plot(group['ta'], group['throughput_lots_per_shift'], marker='x', linestyle='--')
        plt.xlabel('Mean Inter-Arrival Time (ta) [minutes]')
        plt.ylabel('Throughput [lots per 6h shift]')
        plt.title(f'Throughput vs. Inter-Arrival Time for {ept_name_plot}')
        plt.grid(True)
        filename_thru = f"throughput_vs_ta_{ept_name_plot}.png"
        plt.savefig(filename_thru)
        print(f"Plot '{filename_thru}' saved.")
        plt.show()



Starting Simulation Runs

Running simulation with: ta = 600, ca = 1.0076535045644348, EPTs = W2
Mean flowtime (in minutes): 3894.38; Throughput (lots/6h shift, approx by total time): 0.58
------------------------------------------

Running simulation with: ta = 640, ca = 1.0076535045644348, EPTs = W2
Mean flowtime (in minutes): 2266.91; Throughput (lots/6h shift, approx by total time): 0.54
------------------------------------------

Running simulation with: ta = 680, ca = 1.0076535045644348, EPTs = W2
Mean flowtime (in minutes): 1778.08; Throughput (lots/6h shift, approx by total time): 0.51
------------------------------------------

Running simulation with: ta = 720, ca = 1.0076535045644348, EPTs = W2
Mean flowtime (in minutes): 1499.86; Throughput (lots/6h shift, approx by total time): 0.49
------------------------------------------

Running simulation with: ta = 760, ca = 1.0076535045644348, EPTs = W2
Mean flowtime (in minutes): 1267.33; Throughput (lots/6h shift, approx by total 

<IPython.core.display.Javascript object>

Plot 'throughput_vs_ta_W2.png' saved.


<IPython.core.display.Javascript object>