In [31]:
import pandas as pd
from pandas_ods_reader import read_ods
import numpy as np
import datetime as datetime
import re

# Datenvorverarbeitung

In [32]:
path = "../data/Arbeitsplanung_steuerung_Wickelmaschinen_alt.ods"
sheet_idx = 14
df = read_ods(path, sheet_idx)
df = df.rename(columns=df.iloc[10])
df = df.rename(columns={df.columns[0]:'Nichts'})
df = df.drop('Nichts', axis=1)
df = df.drop(np.arange(12))
df = df.reset_index(drop=True)
df = df[['Fertigungsauf-tragsnummer', 'Teilenummer', 
         'Auftragsmenge',  'Nummer Wickel-rohrmaschine', 'Werkzeug-nummer', 'Rüstzeit für WKZ/Materialwechsel', 'Rüstzeit für Coilwechsel', 'Maschinen-laufzeit', 'Fertigungszeit pro Mengeneinheit']]
df

Unnamed: 0,Fertigungsauf-tragsnummer,Teilenummer,Auftragsmenge,Auftragsmenge.1,Nummer Wickel-rohrmaschine,Werkzeug-nummer,Rüstzeit für WKZ/Materialwechsel,Rüstzeit für Coilwechsel,Maschinen-laufzeit,Fertigungszeit pro Mengeneinheit
0,92021412719.0,60PAP2060,1000.0,0.0,Wickelrohrmaschine 1531,A0 023,15.0,2.0,323.96,0.31
1,92021529395.0,70AA30000,3000.0,34.0,Wickelrohrmaschine 1531,A0 023,15.0,2.0,147.19,3.89
2,92021529919.0,80PAK,3000.0,24.0,Wickelrohrmaschine 1531,A0 023,15.0,2.0,124.49,4.56
3,92021530000.0,90APK004,3000.0,20.0,Wickelrohrmaschine 1531,A0 023,15.0,2.0,104.03,4.45
4,92021539118.0,55PAP3985,480.0,20.0,Wickelrohrmaschine 1535,A0 023,15.0,2.0,570.94,1.16
5,92021539115.0,55PAP1140,300.0,20.0,Wickelrohrmaschine 1535,A0 023,15.0,2.0,75.24,0.2
6,92021640739.0,55PAP0450,500.0,0.0,Wickelrohrmaschine 1536,A0 012,15.0,2.0,53.32,0.08
7,92021640740.0,55PAP0540,300.0,0.0,Wickelrohrmaschine 1537,A0 012,15.0,2.0,42.59,0.09
8,92021641265.0,80APK1401,21.0,0.0,Wickelrohrmaschine 1531,A0 016,15.0,2.0,19.68,0.22
9,92021640738.0,55PAP0060,480.0,0.0,Wickelrohrmaschine 1535,A0 012,15.0,2.0,19.9,0.01


In [22]:
def parse_machine_number(df, column):
    """
    Function extracts the machine number of a expression in the format
    "Wickelrohrmaschine 1234" and gives back only the machines with a
    number expression.

    Parameters
    ----------
    df : pd.DataFrame
        dataframe with a column which contains a string expression
         for the machine
    column: string
        Name of the column that contains information about the machine

    Returns
    ---------
    pd.DataFrame
        Updated dataframe with new columns 'machine' and 'machine_id
    """
    #df = df[df.Nummer Wickel-rohrmaschine.str.startswith('Wickelrohrmaschine')]
    df['machine_id'] = df.apply(lambda row: str(row[column][-4:]).strip(),
                             axis=1)
    return df


In [29]:
df = parse_machine_number(df, 'Nummer Wickel-rohrmaschine')
df

KeyError: 'Nummer Wickel-rohrmaschine'

In [24]:
def calculate_setup_time(tool1, tool2):
    if tool1 == tool2:
        setup_time = 0
    else:
        setup_time = 15
    return setup_time

def calculate_timestamps(df, start, last_tool):
    machines = df['machine_id'].astype(int).unique()
    df = df.assign(starttime=0)
    df = df.assign(endtime=0)
    df = df.assign(setup_time=0)
    # Für jede Maschine 
    for machine in machines:
        df_machine = df[df['machine_id'] == str(machine)]
        timestamp = start
        # Entsprechend der Reihenfolge timestamps berechnen
        for index, row in df_machine.iterrows():
            order_num = row['Fertigungsauf-tragsnummer']
            if timestamp.hour > 18: # Nächster Tag
                timestamp = datetime.datetime(timestamp.year, timestamp.month, timestamp.day+1, 6, 0, 0)
            df.loc[df['Fertigungsauf-tragsnummer'] == order_num, ['starttime']] = timestamp
            tool = row['Werkzeug-nummer']
            setup_time = calculate_setup_time(tool, last_tool)
            df.loc[df['Fertigungsauf-tragsnummer'] == order_num, ['setup_time']] = setup_time
            try:
                prod_time = int(row['Maschinen-laufzeit'])
            except:
                prod_time = 60
            runtime = prod_time + setup_time
            timestamp = timestamp + datetime.timedelta(minutes=runtime)
            order_num = row['Fertigungsauf-tragsnummer']
            df.loc[df['Fertigungsauf-tragsnummer'] == order_num, ['endtime']] = timestamp
            last_tool = tool
    return df
            
df = calculate_timestamps(df, datetime.datetime(2022, 3, 16, 6, 0, 0), 'A0 023')

In [25]:
def preprocess_part_nr(df):
    #df['part'] = df.apply(lambda row: row['Teilenummer'][:row['Teilenummer'].rfind('[A-Z]')],axis=1)
    df['part'] = df.apply(lambda row: re.findall("[0-9]+[A-Z]+", row['Teilenummer'])[-1], axis=1)
    return df

df = preprocess_part_nr(df)

In [30]:
df = df[['part', 'machine_id', 'starttime', 'endtime', 'setup_time']]
df

Unnamed: 0,part,machine_id,starttime,endtime,setup_time
0,60PAP,1531,2022-03-16 06:00:00+00:00,2022-03-16 11:23:00+00:00,0
1,70AA,1531,2022-03-16 11:23:00+00:00,2022-03-16 13:50:00+00:00,0
2,80PAK,1531,2022-03-16 13:50:00+00:00,2022-03-16 15:54:00+00:00,0
3,90APK,1531,2022-03-16 15:54:00+00:00,2022-03-16 17:38:00+00:00,0
4,55PAP,1535,2022-03-16 06:00:00+00:00,2022-03-16 15:45:00+00:00,15
5,55PAP,1535,2022-03-16 15:45:00+00:00,2022-03-16 17:00:00+00:00,0
6,55PAP,1536,2022-03-16 06:00:00+00:00,2022-03-16 07:08:00+00:00,15
7,55PAP,1537,2022-03-16 06:00:00+00:00,2022-03-16 06:57:00+00:00,15
8,80APK,1531,2022-03-16 17:38:00+00:00,2022-03-16 18:12:00+00:00,15
9,55PAP,1535,2022-03-16 17:00:00+00:00,2022-03-16 17:34:00+00:00,15


# Pm4py

In [27]:
import pm4py
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.conversion.log import converter as log_converter

In [28]:
log_df = dataframe_utils.convert_timestamp_columns_in_df(df)
log_df = log_df.sort_values('starttime')
event_log = log_converter.apply(log_df)

KeyError: 'case:concept:name'