# imports and setup

In [None]:
# Jupyter
%matplotlib inline

# Env + basics
import os
import pandas as pd

# Project modules
from pipeline.engine import run_pipeline
from pipeline.reporting import show_run_report
from pipeline.review import ReviewConfig

# API key + model (used by project/config.py)
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "sk-...")

# Load data (ohne parse_dates)
file_path = r"path_to_file"
df = pd.read_csv(file_path, sep=",", on_bad_lines="skip", engine="python").reset_index(drop=True)

print(len(df))
df.head(12)

# experiments - openai cloud llm

## 1

In [None]:
prompt = "Show the distribution of end schedule deviation for operations labeled mit TA."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 2

In [None]:
prompt = "Display the distribution of cycle time deviation for sawing operations with completion in 2024."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 3

In [None]:
prompt = "Show me a statistical summary of the cycle times for ressource group Drahtschneiden."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 4

In [None]:
prompt = "Display key statistics of cycle times for finished operations."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 5

In [None]:
prompt = "What are average cycle times by ressource group for finished parts?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 6

In [None]:
prompt = "Compare the max lead times for Fräsen Gruppe Kleinteile by operation type."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 7

In [None]:
prompt = "Over time, how is monthly start schedule deviation developing for function check operations?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 8

In [None]:
prompt = "Show me the trend for weekly end schedule deviations of operations in 2025."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 9

In [None]:
prompt = "How is TA Zugang correlating to TA Abgang for Fräsen DM100 operations?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 10

In [None]:
prompt = "Show me how the numerical values in the data are related to each other, finished operations only."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 11

In [None]:
prompt = "What are the main drivers for end-dates being higher or lower? Only operations in 2024."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 12

In [None]:
prompt = "Which drivers affected lead times for assembly operations?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 13

In [None]:
prompt = "Forecast weekly average end-date deviation for the next 6 weeks."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 14

In [None]:
prompt = "Show me how the maximum start deviation will change for the next 6 weeks."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 15

In [None]:
prompt = "What is the expected end-date deviation for a new case given start delay of 10 days and run-through time of 3 days."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 16

In [None]:
prompt = "If start deviation is 3 days and cycle time is 2 days, what would be a good prediction for the end deviation?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 17

In [None]:
prompt = "Predict the lead time deviation if lead time is 3 days and start deviation is -20 days."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 18

In [None]:
prompt = "Classify the likely ta_status if start is 10 days early and resource group ‘Fräsen DM100’."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 19

In [None]:
prompt = "Which ta status is to expect for a operation type 'Kleinbauteile schlichten' with a cycle time of 1?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 20

In [None]:
prompt = "Classify a likely ta status in a situation where the operation startet 20 days early with a 'Funktionsprüfung'."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 21

In [None]:
prompt = "How is the cycle time distributed for Profilschleifen across ta status categories?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 22

In [None]:
prompt = "For Konstruktion operations, what are the main statistics about the end schedule deviation?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 23

In [None]:
prompt = "Which resource groups drive the end-date deviation the most?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 24

In [None]:
prompt = "Forecast the next 16 weeks of weekly average Lead-time deviation."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 25

In [None]:
prompt = "Which resource group should we route to to minimize end deviation?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 26

In [None]:
#prompt = "Which operation type would be best for low cycle times? Use candidate ranking"
prompt = "Rank resource groups from worst to best by expected cycle times (typical conditions)."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

In [None]:
df['avg_type'].value_counts()

## 27

In [None]:
prompt = "I want to improve cycle time (lower is better). What can I do? Show me decision options."

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 28

In [None]:
prompt = "For unfinshed operations: I want to decrease start deviation. What do I need to work on?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 29

In [None]:
prompt = "Could I improve average cycle time if I use '3D CAM' as a substitute for  ‘CAM-Fertigung’?"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        #after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)

## 30

In [None]:
prompt = "What happens to expected end-date deviation if we route work to 'Fräsen Gruppe 5-Achs' instead of ‘Fräsen DM100’? (group compare)"

state, df_out, results = await run_pipeline(
    prompt=prompt,
    df=df,
    review_config=ReviewConfig(
        enabled=True,
        after_steps={"family","type","filters","columns","prepare","aggregate","viz","analyze","verify","finalize"},
        #after_steps={},
        show_step_inputs=True,
    )
)

show_run_report(
    results,
    show_head_df=df_out,
    show_step_inputs=False,   # <- nicht doppelt
    verbose_steps=False,      # <- keine Step-Dumps am Ende
    render_final=True,        # <- Plots + Final text
    final_head_rows=5,
)