In [4]:
from dotenv import load_dotenv
load_dotenv()
import nest_asyncio
nest_asyncio.apply()
from openai import OpenAI
import json

In [5]:
with open("structured_data_no_metrics.json", "r") as f:
    loaded_documents = json.load(f)

In [6]:
texts = []
doc_ids = []
metadatas = []
for i, doc in enumerate(loaded_documents):
    # Combine fields into a single text string
    workstation_text = ", ".join([
        f"Workstation Number: {doc['Workstation number']}, "
        f"Workstation Name: {doc['Workstation name']}, "
        f"Number of Products: {doc['Number of products']}, "
        f"Working hours (Sec.): {doc['Working hours (Sec.)']}, "
        f"Number of Operators: {doc['Number of operators']}, "
        f"Cycle Time (Sec.): {doc['Cycle Time (Sec.)']}, "
        f"Job saturation (%): {doc['Job saturation (%)']}, "
        f"Value Judgment Process: {doc['Value Judgment Process']}, "])
    
    ops_text = ", ".join([
        f"Serial Number: {op['serial_number']}, " 
        f"Description: {op['description']}, " 
        f"Manual (sec): {op['manual_sec']}sec, " 
        f"Machine (sec): {op['machine_sec']}sec, " 
        f"Value Judgment Unit: {op['value_judgment_unit']}, " 
        f"Action Item Classification: {op['action_item_classification']}, " 
        f"Current Action Level: {op['current_action_level']}"
        for op in doc["operations"]])
    text = f"Workstation: {workstation_text}\nOperations: {ops_text}\nImprovements: {doc['improvement items']['goals'] if doc['improvement items']['goals'] != '/' else 'No improvements listed'}"

    texts.append(text)
    doc_ids.append(str(i))
    metadatas.append({
        "product_number": doc["product_number"],
        "workstation_number": str(doc["Workstation number"]),
        "workstation_name": doc["Workstation name"]
    })

In [7]:
from langchain_openai import OpenAIEmbeddings
embed_model = OpenAIEmbeddings(model="text-embedding-3-small")
all_embeddings = embed_model.embed_documents(texts)

In [8]:
import textwrap
print(len(doc_ids))
print(len(metadatas))
print(len(texts))

print(len(all_embeddings), len(all_embeddings[0]))

9
9
9
9 1536


In [11]:
import chromadb
# Initialize Chroma
## Create a collection for data injection

# Initialize Chroma
collection_name = "improvement_suggestions"
chroma_client = chromadb.PersistentClient(path="./chroma_db_6")

try:
    collection = chroma_client.create_collection(name=collection_name)
except Exception as e:
    if "already exists" in str(e).lower():
        collection = chroma_client.get_collection(name=collection_name)
    else:
        raise



In [12]:
initial_count = collection.count()
print(f'Initial count#: {initial_count}')

Initial count#: 0


In [10]:
chroma_client.delete_collection(name=collection_name)
# collection.count()

In [13]:
collection.add(ids = doc_ids, embeddings=all_embeddings, documents=texts, metadatas=metadatas)

In [49]:
template = """You are an expert in process optimization. Based on the user's question and the provided search results, \
    analyze the workstation and provide improvement suggestions using the ECRS framework (Eliminate, Combine, Rearrange, Simplify). \
        For each suggestion, explain how it improves efficiency, reduces cycle time, or enhances job saturation. \
            Include operation serial number while referring them.

User's question: {query_texts}
Search results: {results}

Format your response as:
- **Eliminate**: [Suggestions to remove unnecessary steps or operations]
- **Combine**: [Suggestions to merge steps or operations]
- **Rearrange**: [Suggestions to reorder steps for efficiency]
- **Simplify**: [Suggestions to streamline processes or reduce complexity]
"""
template += """
Example ECRS suggestions:
- Eliminate: Remove redundant manual inspections if automated tests are sufficient.
- Combine: Merge two sequential tests into a single operation to reduce cycle time.
- Rearrange: Perform high-variance operations first to stabilize downstream processes.
- Simplify: Use standardized tools to reduce operator training time.
"""
template += "\nBefore providing suggestions, analyze the workstation's cycle time, job saturation, \
    and operations to identify inefficiencies. Explain your reasoning."

In [None]:
template = """You are an expert in manufacturing process optimization, specializing in workstation efficiency. Based on the user's question and the provided search results, analyze the specified workstation and its operations to generate improvement suggestions that enhance productivity, with a focus on increasing job saturation (e.g., reducing operator idle time, balancing workload). Provide suggestions using the ECRS framework (Eliminate, Combine, Rearrange, Simplify), targeting inefficiencies at the operation level. For each suggestion, include the operation serial number, explain how it improves productivity (e.g., reduces cycle time, increases throughput, or enhances job saturation), and quantify the potential impact where possible (e.g., 'reduces cycle time by approximately 5 seconds'). If trade-offs exist (e.g., increased complexity), briefly note them.

User's question: {query_texts}
Search results: {results}

**Instructions**:
1. Analyze the workstation's cycle time, job saturation, number of operators, and operation details (manual and machine times, value judgment units) from the search results to identify inefficiencies. If specific metrics are missing, note assumptions and proceed with reasonable estimates.
2. Prioritize suggestions that increase job saturation by minimizing idle time or optimizing operator tasks, while also considering cycle time reduction and throughput improvement.
3. For each ECRS suggestion, reference the operation serial number and use data from the search results to justify the recommendation.
4. If the search results lack sufficient data, provide general suggestions based on typical manufacturing inefficiencies and note the need for more data.

**Format your response**:
- **Analysis**: Summarize the workstation's key metrics (e.g., cycle time, job saturation, number of operators) and identify inefficiencies (e.g., low job saturation, high manual time).
- **Eliminate**: [Suggestions to remove unnecessary steps or operations, with operation serial numbers and impact]
- **Combine**: [Suggestions to merge steps or operations, with operation serial numbers and impact]
- **Rearrange**: [Suggestions to reorder steps for efficiency, with operation serial numbers and impact]
- **Simplify**: [Suggestions to streamline processes or reduce complexity, with operation serial numbers and impact]

**Example ECRS suggestions for a workstation like 'Coplanarity test (retest)'**:
- **Eliminate**: Remove operation OP123 (manual inspection) if automated testing (OP124) covers the same quality checks, reducing cycle time by 10 seconds and increasing job saturation by freeing operator time.
- **Combine**: Merge operations OP125 and OP126 (sequential measurements) into a single operation using a multi-function tool, reducing cycle time by 8 seconds and improving job saturation by 5%.
- **Rearrange**: Perform operation OP127 (high-variance setup) before OP128 (stable testing) to reduce setup variability, potentially decreasing cycle time by 5 seconds.
- **Simplify**: Standardize tooling for operation OP129 to reduce operator training time, improving job saturation by minimizing errors and delays.
"""

In [65]:
template = """You are an expert in manufacturing process optimization, specializing in workstation efficiency. Based on the user's question and the provided search results, analyze the specified workstation and its operations to generate improvement suggestions that enhance productivity, with a focus on increasing job saturation (e.g., reducing operator idle time, balancing workload) or throughput (e.g., increasing output per unit time). Provide suggestions using the ECRS framework (Eliminate, Combine, Rearrange, Simplify), targeting inefficiencies at the operation level. For each suggestion, include the operation serial number, explain how it improves productivity, and quantify the potential impact where possible (e.g., 'reduces cycle time by 5 seconds' or 'increases throughput by 10 units/hour'). If trade-offs exist (e.g., increased complexity), briefly note them.

User's question: {query_texts}
Search results: {results}

**Instructions**:
1. Summarize the workstation's key metrics (e.g., cycle time, job saturation, number of operators, throughput) and operation details (manual and machine times, value judgment units) from the search results to provide context.
2. Identify inefficiencies (e.g., low job saturation, high manual time, bottlenecks) and propose general strategies to improve job saturation or throughput.
3. Provide operation-specific ECRS suggestions, referencing operation serial numbers and using data from the search results to justify recommendations.
4. If specific metrics are missing, note assumptions and provide reasonable estimates or general suggestions based on typical manufacturing inefficiencies.
5. Ensure all suggestions are practical, actionable, and aligned with manufacturing best practices.

**Response Format**:
- **Section 1: Summary of the Workstation**:
  - Describe the workstation's key metrics (e.g., cycle time, job saturation, number of operators, throughput).
  - Summarize the operations (e.g., manual vs. machine time, value judgment units).
  - Highlight any inefficiencies or bottlenecks based on the data.
- **Section 2: How to Improve Job Saturation or Throughput**:
  - Propose general strategies to increase job saturation (e.g., reducing idle time, balancing operator tasks) or throughput (e.g., reducing cycle time, optimizing bottlenecks).
  - Explain how these strategies address inefficiencies identified in Section 1.
  - Quantify potential improvements where possible.
- **Section 3: ECRS Principle Suggestions**:
  - **Eliminate**: Suggestions to remove unnecessary steps or operations, with operation serial numbers, impact, and trade-offs.
  - **Combine**: Suggestions to merge steps or operations, with operation serial numbers, impact, and trade-offs.
  - **Rearrange**: Suggestions to reorder steps for efficiency, with operation serial numbers, impact, and trade-offs.
  - **Simplify**: Suggestions to streamline processes or reduce complexity, with operation serial numbers, impact, and trade-offs.

**Standard Example Response** (for a workstation like 'Coplanarity test (retest)'):

- **Section 1: Summary of the Workstation**:
  - The 'Coplanarity test (retest)' workstation has a cycle time of 60 seconds, job saturation of 70%, 2 operators, and a throughput of 60 units/hour. It includes operations OP123 (manual inspection, 15 sec), OP124 (automated test, 10 sec), OP125 (measurement, 10 sec), and OP126 (setup, 15 sec). Inefficiencies include low job saturation (70%) indicating operator idle time during machine operations and potential redundancy between OP123 and OP124.
- **Section 2: How to Improve Job Saturation or Throughput**:
  - To increase job saturation, reduce operator idle time by redistributing manual tasks during machine operations, potentially increasing saturation to 85%. To improve throughput, eliminate redundant operations and optimize cycle time, aiming for 80 units/hour. Balancing manual and machine tasks can address the bottleneck in OP126 (setup).
- **Section 3: ECRS Principle Suggestions**:
  - **Eliminate**: Remove OP123 (manual inspection) as OP124 (automated test) covers the same quality checks, reducing cycle time by 15 seconds and increasing job saturation by 10%. *Trade-off*: Requires validation of automated test reliability.
  - **Combine**: Merge OP125 and OP126 (measurement and setup) into a single operation using a multi-function tool, reducing cycle time by 10 seconds and improving throughput by 5 units/hour. *Trade-off*: May require investment in new equipment.
  - **Rearrange**: Perform OP126 (setup) before OP125 (measurement) to reduce setup variability, decreasing cycle time by 5 seconds. *Trade-off*: Minimal, but requires operator retraining.
  - **Simplify**: Standardize tooling for OP124 to reduce operator errors, improving job saturation by 5% and reducing defect-related delays. *Trade-off*: Initial training cost.
"""

In [69]:
template = """You are an expert in manufacturing process optimization, specializing in workstation efficiency. Based on the user's question and the provided search results, analyze the specified workstation and its operations to generate improvement suggestions that enhance productivity, with a focus on increasing job saturation (e.g., reducing operator idle time, balancing workload) or throughput (e.g., increasing output per unit time). Provide suggestions using the ECRS framework (Eliminate, Combine, Rearrange, Simplify), targeting inefficiencies at the operation level. For each suggestion, include the operation serial number, explain how it improves productivity, and quantify the potential impact where possible (e.g., 'reduces cycle time by 5 seconds' or 'increases throughput by 10 units/hour'). If trade-offs exist (e.g., increased complexity), briefly note them.

**Value Judgment Units**: A measure of the complexity or criticality of a manual operation (e.g., 1 for simple assembly, 3 for intricate inspection).

User's question: {query_texts}
Search results: {results}

**Instructions**:
1. Summarize the workstation's key metrics (e.g., cycle time, job saturation, number of operators, throughput) and operation details (manual and machine times, value judgment units) from the search results to provide context.
2. Identify inefficiencies (e.g., low job saturation, high manual time, bottlenecks) and propose general strategies to improve job saturation or throughput.
3. Provide operation-specific ECRS suggestions, referencing operation serial numbers and using data from the search results to justify recommendations. **Ensure all analysis and suggestions are directly supported by the information found in the 'Search results'. Explicitly mention the data points used to justify your recommendations.**
4. If specific metrics are missing, note assumptions and provide reasonable estimates or general suggestions based on typical manufacturing inefficiencies.
5. Ensure all suggestions are practical, actionable, and aligned with manufacturing best practices. **If the 'Search results' contain conflicting information, prioritize data that appears most recent or is attributed to a reliable source. Briefly note any significant discrepancies and how they were addressed in your analysis.**

**Response Format**:
- **Section 1: Summary of the Workstation**:
  - Describe the workstation's key metrics (e.g., cycle time, job saturation, number of operators, throughput).
  - Summarize the operations (e.g., manual vs. machine time, value judgment units).
  - Highlight any inefficiencies or bottlenecks based on the data.
- **Section 2: How to Improve Job Saturation or Throughput**:
  - Propose general strategies to increase job saturation (e.g., reducing idle time, balancing operator tasks) or throughput (e.g., reducing cycle time, optimizing bottlenecks).
  - Explain how these strategies address inefficiencies identified in Section 1.
  - Quantify potential improvements where possible **(in terms of seconds, units per hour, or percentage change, as appropriate)**.
- **Section 3: ECRS Principle Suggestions**:
  - **Eliminate**: Suggestions to remove unnecessary steps or operations, with operation serial numbers, impact, and trade-offs.
  - **Combine**: Suggestions to merge steps or operations, with operation serial numbers, impact, and trade-offs.
  - **Rearrange**: Suggestions to reorder steps for efficiency, with operation serial numbers, impact, and trade-offs.
  - **Simplify**: Suggestions to streamline processes or reduce complexity, with operation serial numbers, impact, and trade-offs.

**Standard Example Response** (for a workstation like 'Coplanarity test (retest)'):

- **Section 1: Summary of the Workstation**:
  - The 'Coplanarity test (retest)' workstation has a cycle time of 60 seconds, job saturation of 70%, 2 operators, and a throughput of 60 units/hour. It includes operations OP123 (manual inspection, 15 sec, 2 VJU), OP124 (automated test, 10 sec), OP125 (measurement, 10 sec, 1 VJU), and OP126 (setup, 15 sec). Inefficiencies include low job saturation (70%) indicating operator idle time during machine operations and potential redundancy between OP123 and OP124.
- **Section 2: How to Improve Job Saturation or Throughput**:
  - To increase job saturation, reduce operator idle time by redistributing manual tasks during machine operations, potentially increasing saturation to 85%. To improve throughput, eliminate redundant operations and optimize cycle time, aiming for 80 units/hour. Balancing manual and machine tasks can address the bottleneck in OP126 (setup).
- **Section 3: ECRS Principle Suggestions**:
  - **Eliminate**: Remove OP123 (manual inspection) as OP124 (automated test) covers the same quality checks (based on search result data indicating 99.9% correlation), reducing cycle time by 15 seconds and increasing job saturation by 10%. *Trade-off*: Requires validation of automated test reliability and potential for missing subjective defects.
  - **Combine**: Merge OP125 and OP126 (measurement and setup) into a single operation using a multi-function tool (as suggested by search result indicating availability of such tools), reducing cycle time by 10 seconds and improving throughput by 5 units/hour. *Trade-off*: May require investment in new equipment and operator training.
  - **Rearrange**: Perform OP126 (setup) before OP125 (measurement) to reduce setup variability (as indicated by search result data showing high setup time variation after measurement), decreasing cycle time by 5 seconds. *Trade-off*: Minimal, but requires operator retraining.
  - **Simplify**: Standardize tooling for OP124 to reduce operator errors (as mentioned in a search result regarding common causes of defects), improving job saturation by 5% and reducing defect-related delays. *Trade-off*: Initial training cost and potential resistance to change.
"""



In [70]:
print(template)

You are an expert in manufacturing process optimization, specializing in workstation efficiency. Based on the user's question and the provided search results, analyze the specified workstation and its operations to generate improvement suggestions that enhance productivity, with a focus on increasing job saturation (e.g., reducing operator idle time, balancing workload) or throughput (e.g., increasing output per unit time). Provide suggestions using the ECRS framework (Eliminate, Combine, Rearrange, Simplify), targeting inefficiencies at the operation level. For each suggestion, include the operation serial number, explain how it improves productivity, and quantify the potential impact where possible (e.g., 'reduces cycle time by 5 seconds' or 'increases throughput by 10 units/hour'). If trade-offs exist (e.g., increased complexity), briefly note them.

**Value Judgment Units**: A measure of the complexity or criticality of a manual operation (e.g., 1 for simple assembly, 3 for intrica

In [51]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnablePassthrough
import textwrap
llm = ChatOpenAI(temperature = 0.4, model = "gpt-4o")


# template ="""You will recieve the user's question along with the search results of that question \
# over a database. Give the user the proper answer.
# User's question: {query_texts} 
# Search results: {results}
# When a question is involved regarding an operation, please fully mention that in your answer.
# """


In [71]:
prompt = PromptTemplate.from_template(template=template)
chain = prompt | llm | StrOutputParser()

query_texts = "Please analyze the operations of workstation 'Internal GAP inspection (CCD inspection)' and provide improvement suggestions."
query_embeddings = embed_model.embed_query(query_texts)
# Load the chromadb collection for vector search
vectordb = chroma_client.get_collection(name="improvement_suggestions")
# Retrieve relevant chunk
results = vectordb.query(
    query_embeddings = query_embeddings,
    # n_results=1 #top_k
)
response = chain.invoke({'query_texts': query_texts, 'results': results})
print(response)

Number of requested results 10 is greater than number of elements in index 9, updating n_results = 9


### Section 1: Summary of the Workstation

- **Workstation Overview**: The 'Internal GAP inspection (CCD inspection)' workstation is identified as Workstation Number 13. It operates with a cycle time of 6.14 seconds and is managed by a single operator. The job saturation is at 76.75%, indicating some level of operator idle time.
- **Operations**: The workstation involves three main operations:
  - **Serial Number 1**: Taking a product and placing it on the CCD holder (1.88 seconds, Non-Value Added (NVA), Transport).
  - **Serial Number 2**: Shaking the product to confirm baseline alignment (2.49 seconds, Value Added (VA), Operation).
  - **Serial Number 3**: Removing the product and placing it on the flow line (1.77 seconds, NVA, Transport).
- **Inefficiencies**: The job saturation rate suggests potential for optimization, particularly in reducing idle time. The manual times for operations, especially the transport actions, indicate room for efficiency improvements.

### Section 2: How

In [68]:
results['distances']

[[0.605694183322796,
  0.6199018911398201,
  0.7903752371278352,
  0.794047400634398,
  0.8125527545319149,
  0.8500300843085387,
  0.8612104279171591,
  0.9056025245934073,
  0.9075345678822588]]

In [21]:
print('bangladesh \nindia \npakistan')

bangladesh 
india 
pakistan


In [24]:
template = 'Ore dhon'
template += '\ntui ki amar hobi?'
template += '\namar na hoile kar hobi taile?'
print(template)

Ore dhon
tui ki amar hobi?
amar na hoile kar hobi taile?


In [81]:
numbers = [i for i in range(10) if i%2 == 0]
dhon  = ''.join([i for i in numbers])
dhon

TypeError: sequence item 0: expected str instance, int found