In [4]:
from dotenv import load_dotenv
load_dotenv()
import nest_asyncio
nest_asyncio.apply()
from typing import List
from pydantic import BaseModel
import os
import json
import csv

from llama_parse import LlamaParse
from llama_index.core.schema import Document
from llama_index.llms.openai import OpenAI

In [5]:
## Parse the Form Filing File
def parse_file(file_path: str) -> List[Document]:
    llama_parse = LlamaParse(
        api_key=os.environ['LLAMA_CLOUD_API_KEY'],
        result_type='markdown',
        target_pages="0"
    )
    result = llama_parse.load_data(
        file_path,
    )
    return result

In [6]:
# Parsing the report template
documents = parse_file('data/Report_format_2.xlsx')

Started parsing the file under job_id 76d67ecb-7dc0-4289-b06e-43fb469fae13


In [7]:
text = documents[0].text

In [8]:
## Structured Extraction
prompt = f"""
You are an AI assistant specializing in Industrial Engineering problem solving. You've been given an Excel spreadsheet containing items to be improved. \ 
and improvement parameters. Your task is to extract and structure this information in a clear, organized format.

The Excel sheet contains the following:
1. Items to be improved (rows)
2. Description (columns)
3. Improvement parameters and dates (columns)

Input Excel data:
{text}

Please present the extracted and structured information in a clear, easy-to-read format.
"""

In [9]:
## Defining the database
class ReportParameters(BaseModel):
    """Data model for IE problem solving analysis."""
    ItemsToBeImproved: List[str]
    Description: List[str]
    ImprovementParameters: List[str]

In [10]:
## Defining the LLM model
llm = OpenAI(model='gpt-4o-mini')

In [11]:
from llama_index.core.llms import ChatMessage
sllm = llm.as_structured_llm(output_cls=ReportParameters)
input_msg = ChatMessage.from_str(prompt)

In [12]:
## Get the parsed documents as a 
output = sllm.chat([input_msg])
output_obj = output.raw
output_obj

ReportParameters(ItemsToBeImproved=['The saturation of the lower glass point Xiaoli Pill is 65%', 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%', 'Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%', 'The upper and lower glass bonding saturation is 67.5%', 'Five-in-one front&back saturation53.2%', 'CellstickFPCsaturation61.9%', 'FPCFoam resistance test saturation40.8%', 'The saturation of amplified pills and sports cars is 73.3%', 'pointUVglue,UVsolidify&Check saturation33.3%', 'Electrophoresis tank noteBufferliquid saturation79.2%', 'BufferLiquid injection port sealing and solidification+Water leakage test saturation60.3%', 'Silicone oil saturation 50.7%', 'Silicone oil port sealing + dispensing + curing saturation 58%', 'Set Conn & lower cover + paste QR CODE saturation 41%', 'Lock the lid + label the saturation 81.7%', 'Glass module test saturation 100%', 'Visual inspection of UV glue + pull tape is the bottlen

In [13]:
ItemsToBeImproved = output_obj.ItemsToBeImproved
Description = output_obj.Description
ImprovementParameters = output_obj.ImprovementParameters[1:]

In [14]:
## Config at LLamaIndex
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
# pip install llama-index-indices-managed-llama-cloud
index = LlamaCloudIndex(
  name="objective-wildfowl-2024-11-24", 
  project_name="Default",
  organization_id="2033a7fc-187e-48e4-a172-5079c4ee2bbf",
  api_key=os.environ['LLAMA_CLOUD_API_KEY']
)
query_engine = index.as_query_engine(
    dense_similarity_top_k=10,
    sparse_similarity_top_k=10,
    alpha=0.5,
    enable_reranking=True,
    rerank_top_n=5,
)

In [15]:
## Developing the prompt based on description and generate improvement suggestions accordingly

from tqdm import tqdm
def generate_answers(ItemsToBeImproved: List[str],Description: List[str], ImprovementParameters: List[str]) -> List[str]:
    items_to_be_improved = {}
    for index, item in enumerate(ItemsToBeImproved[:3]):
        items_to_be_improved[item] = {}
        for parameter in tqdm(ImprovementParameters):
            job_saturation = (
                f"Analyze the following text and determine whether the job saturation value mentioned is lower than 95%.\n\n"
                f"Text: \"{Description[index]}\"\n\n"
                f"If a job saturation value is explicitly mentioned, check if it is lower than 95%. If so, respond with 'YES'. "
                f"If it is 95% or higher, respond with 'NO'. If no job saturation value is mentioned, respond with 'NO INFORMATION'."
            )
            input_msg = ChatMessage.from_str(job_saturation)
            output = llm.chat([input_msg])
            answer = output.message.content
            print(Description[index], answer)
            if answer == 'YES':
                query = (
                    f"For the problem '{item}', provide a detailed and concise value or description for the improvement parameter '{parameter}'.\n"
                    f"If the parameter is not applicable or no information is available, respond explicitly with 'NA'.\n\n"
                    f"Ensure your response is clear, contextually relevant, and avoids ambiguity."
                    )
                answer = str(query_engine.query(query))
                items_to_be_improved[item]['Description'] = Description[index]
                items_to_be_improved[item][parameter] = answer
    return items_to_be_improved

In [16]:
answers = generate_answers(ItemsToBeImproved, Description, ImprovementParameters)

  0%|                                                                                                                                                      | 0/9 [00:00<?, ?it/s]

Low job saturation(lower than95%) YES


 11%|███████████████▊                                                                                                                              | 1/9 [00:02<00:19,  2.44s/it]

Low job saturation(lower than95%) YES


 22%|███████████████████████████████▌                                                                                                              | 2/9 [00:04<00:14,  2.09s/it]

Low job saturation(lower than95%) YES


 33%|███████████████████████████████████████████████▎                                                                                              | 3/9 [00:06<00:11,  1.99s/it]

Low job saturation(lower than95%) YES


 44%|███████████████████████████████████████████████████████████████                                                                               | 4/9 [00:07<00:08,  1.79s/it]

Low job saturation(lower than95%) YES


 56%|██████████████████████████████████████████████████████████████████████████████▉                                                               | 5/9 [00:08<00:06,  1.61s/it]

Low job saturation(lower than95%) YES


 67%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                               | 6/9 [00:10<00:04,  1.65s/it]

Low job saturation(lower than95%) YES


 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 7/9 [00:12<00:03,  1.74s/it]

Low job saturation(lower than95%) YES


 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 8/9 [00:14<00:01,  1.76s/it]

Low job saturation(lower than95%) YES


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:16<00:00,  1.78s/it]
  0%|                                                                                                                                                      | 0/9 [00:00<?, ?it/s]

Low job saturation(lower than95%) YES


 11%|███████████████▊                                                                                                                              | 1/9 [00:01<00:15,  1.94s/it]

Low job saturation(lower than95%) YES


 22%|███████████████████████████████▌                                                                                                              | 2/9 [00:03<00:12,  1.76s/it]

Low job saturation(lower than95%) YES


 33%|███████████████████████████████████████████████▎                                                                                              | 3/9 [00:05<00:10,  1.80s/it]

Low job saturation(lower than95%) YES


 44%|███████████████████████████████████████████████████████████████                                                                               | 4/9 [00:07<00:09,  1.97s/it]

Low job saturation(lower than95%) YES


 56%|██████████████████████████████████████████████████████████████████████████████▉                                                               | 5/9 [00:09<00:07,  1.77s/it]

Low job saturation(lower than95%) YES


 67%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                               | 6/9 [00:10<00:05,  1.75s/it]

Low job saturation(lower than95%) YES


 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 7/9 [00:12<00:03,  1.66s/it]

Low job saturation(lower than95%) YES


 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 8/9 [00:14<00:01,  1.83s/it]

Low job saturation(lower than95%) YES


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:16<00:00,  1.79s/it]
  0%|                                                                                                                                                      | 0/9 [00:00<?, ?it/s]

Low job saturation(lower than95%) YES


 11%|███████████████▊                                                                                                                              | 1/9 [00:01<00:14,  1.85s/it]

Low job saturation(lower than95%) YES


 22%|███████████████████████████████▌                                                                                                              | 2/9 [00:06<00:22,  3.23s/it]

Low job saturation(lower than95%) YES


 33%|███████████████████████████████████████████████▎                                                                                              | 3/9 [00:07<00:14,  2.49s/it]

Low job saturation(lower than95%) YES


 44%|███████████████████████████████████████████████████████████████                                                                               | 4/9 [00:10<00:13,  2.71s/it]

Low job saturation(lower than95%) YES


 56%|██████████████████████████████████████████████████████████████████████████████▉                                                               | 5/9 [00:12<00:09,  2.29s/it]

Low job saturation(lower than95%) YES


 67%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                               | 6/9 [00:14<00:07,  2.34s/it]

Low job saturation(lower than95%) YES


 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 7/9 [00:16<00:04,  2.17s/it]

Low job saturation(lower than95%) YES


 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 8/9 [00:18<00:02,  2.18s/it]

Low job saturation(lower than95%) YES


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:20<00:00,  2.27s/it]


In [None]:
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)
# Flatten the nested dictionary
flat_data = []
for parameter, metrics in answers.items():
    flat_metrics = flatten_dict(metrics)
    flat_metrics['Items to be improved'] = parameter 
    flat_data.append(flat_metrics)

# Get all unique keys to use as CSV headers
headers = set()
for item in flat_data:
    headers.update(item.keys())

# Sort headers to ensure 'Items to be improved' comes first
headers = sorted(headers)
headers.insert(0, headers.pop(headers.index('Items to be improved')))
headers.insert(1, headers.pop(headers.index('Description')))
headers.insert(2, headers.pop(headers.index('Improvement direction')))
headers.insert(3, headers.pop(headers.index('Person responsible')))
headers.insert(4, headers.pop(headers.index('Expected start date')))
headers.insert(5, headers.pop(headers.index('Actual start date')))
headers.insert(6, headers.pop(headers.index('Expected completion date')))
headers.insert(7, headers.pop(headers.index('Actual completion date')))
headers.insert(8, headers.pop(headers.index('Improve immediately')))
headers.insert(9, headers.pop(headers.index('Confirmation')))
headers.insert(10, headers.pop(headers.index('appendix')))

In [22]:
import os
os.getcwd()
os.listdir('./data')

['BP_Excel.xlsx',
 'Paper-2.pdf',
 'policy.pdf',
 'PPS process.xlsx',
 'Report_format.xlsx',
 'Report_format_2.xlsx',
 'sample_excel.xlsx',
 'sec_10k_analysis_form_filling.xlsx',
 'Series Cover Packer.pptx',
 'Series_Cover_Packer_2.pptx',
 'tasks.xlsx',
 'template_2.xlsx',
 '_10-K-2021-(As-Filed).pdf',
 '~$Series_Cover_Packer_2.pptx']

In [23]:
# Write the report into CSV
with open('./data/Report_format_2_complete.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=headers)
    writer.writeheader()
    for row in flat_data:
        writer.writerow(row)

In [25]:
# Read the CSV and save into Excel File
import pandas as pd
from IPython.core.display import HTML
pd.set_option('display.max_colwidth', 10)
out_df = pd.read_csv("Report_format_2_complete.csv")
out_df.to_excel("./data/Report_format_2_complete.xlsx", index=False)

## Display the 
html = out_df.to_html()
HTML(html)


Unnamed: 0,Items to be improved,Description,Improvement direction,Person responsible,Expected start date,Actual start date,Expected completion date,Actual completion date,Improve immediately,Confirmation,appendix
0,The saturation of the lower glass point Xiaoli Pill is 65%,Low job saturation(lower than95%),"1. Judgment of value and reduce tasks without added value\n2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards\n3. Automated level inspection: simple and automated import\n4. Merge and rearrange new job elements",LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX
1,Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%,Low job saturation(lower than95%),"The improvement direction for the problem 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%' is as follows: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",LiXX,10/8/24,10/8/24,Expected completion date: 10/15/24,10/15/24,yes,The improvement parameter 'Confirmation' for the problem 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%' is 'yes'.,LiXX
2,Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%,Low job saturation(lower than95%),1. Judgment of value and reduce tasks without added value\n2. Inspection of movement quality and human engineering hazards\n3. Automated level inspection\n4. Merge and rearrange new job elements,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,The improvement parameter 'Confirmation' for the problem 'Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%' is 'yes'.,LiXX


In [None]:
#Reading the exported excel file and develop the  Gantt Chart from there!