In [186]:
from dotenv import load_dotenv
load_dotenv()

True

In [187]:
import nest_asyncio
nest_asyncio.apply()

In [188]:
from typing import List
from pydantic import BaseModel
import os
import json
import csv

from llama_parse import LlamaParse
from llama_index.core.schema import Document
from llama_index.llms.openai import OpenAI
import nest_asyncio

nest_asyncio.apply()

In [338]:
## Parse the Form Filing File
def parse_file(file_path: str) -> List[Document]:
    llama_parse = LlamaParse(
        api_key=os.environ['LLAMA_CLOUD_API_KEY'],
        result_type='markdown',
        target_pages="0"
    )
    result = llama_parse.load_data(
        file_path,
    )
    return result

In [343]:
documents = parse_file('data/Report_format_2.xlsx')

Started parsing the file under job_id 09f778cd-a272-41c7-b8e5-4e044b5bba98


In [344]:
print(f'Doc length: {type(documents)}')
print(len(documents))

Doc length: <class 'list'>
1


In [345]:
print(f'Doc length: {len(documents)}')
print(documents[0].text)

Doc length: 1
|Items to be improved                                                                         |Description|Improvement Parameters|                  |                   |                 |                        |                      |                   |            |        |
|---------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------|------------------|-------------------|-----------------|------------------------|----------------------|-------------------|------------|--------|
|                                                                                             | |Improvement direction |Person responsible|Expected start date|Actual start date|Expected completion date|Acutal c

In [346]:
text = documents[0].text

In [347]:
## Structured Extraction
prompt = f"""
You are an AI assistant specializing in Industrial Engineering problem solving. You've been given an Excel spreadsheet containing items to be improved. \ 
and improvement parameters. Your task is to extract and structure this information in a clear, organized format.

The Excel sheet contains the following:
1. Items to be improved (rows)
2. Description (columns)
3. Improvement parameters and dates (columns)

Input Excel data:
{text}

Please present the extracted and structured information in a clear, easy-to-read format.
"""

In [358]:
class ReportParameters(BaseModel):
    """Data model for IE problem solving analysis."""
    ItemsToBeImproved: List[str]
    Description: List[str]
    ImprovementParameters: List[str]
    # ImprovementPerson: List[str]
    # StartDate: List[str]
    # CompletionDate: List[str]
    # ImproveImmediately: List[bool]
    # Confirmation: List[str]
    # Appendix: List[str]

In [409]:
input_msg = ChatMessage.from_str('What is your name?')
output = llm.chat([input_msg])
output.message.content

'I’m called ChatGPT. How can I assist you today?'

'I’m called ChatGPT. How can I assist you today?'

In [359]:
llm = OpenAI(model='gpt-4o-mini')

In [360]:
from llama_index.core.llms import ChatMessage
sllm = llm.as_structured_llm(output_cls=ReportParameters)
input_msg = ChatMessage.from_str(prompt)

In [361]:
output = sllm.chat([input_msg])
output_obj = output.raw
output_obj

ReportParameters(ItemsToBeImproved=['The saturation of the lower glass point Xiaoli Pill is 65%', 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%', 'Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%', 'The upper and lower glass bonding saturation is 67.5%', 'Five-in-one front&back saturation53.2%', 'CellstickFPCsaturation61.9%', 'FPCFoam resistance test saturation40.8%', 'The saturation of amplified pills and sports cars is 73.3%', 'pointUVglue,UVsolidify&Check saturation33.3%', 'Electrophoresis tank noteBufferliquid saturation79.2%', 'BufferLiquid injection port sealing and solidification+Water leakage test saturation60.3%', 'Silicone oil saturation 50.7%', 'Silicone oil port sealing + dispensing + curing saturation 58%', 'Set Conn & lower cover + paste QR CODE saturation 41%', 'Lock the lid + label the saturation 81.7%', 'Glass module test saturation 100%', 'Visual inspection of UV glue + pull tape is the bottlen

In [366]:
#print out the items based on the data model
print(len(output_obj.ItemsToBeImproved))
print(len(output_obj.Description))
print(len(output_obj.ImprovementParameters))

22
22
10


In [380]:
ItemsToBeImproved = output_obj.ItemsToBeImproved
Description = output_obj.Description
ImprovementParameters = output_obj.ImprovementParameters[1:]
# print(output_obj.ImprovementDirection)
# print(output_obj.BriefDescription)
# print(output_obj.ImprovementPerson)
# print(output_obj.StartDate)
# print(output_obj.CompletionDate)
# print(output_obj.ImproveImmediately)
# print(output_obj.Confirmation)
# print(output_obj.Appendix)

In [381]:
ImprovementParameters

['Improvement direction',
 'Person responsible',
 'Expected start date',
 'Actual start date',
 'Expected completion date',
 'Acutal completion date',
 'Improve immediately',
 'Confirmation',
 'appendix']

In [368]:
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
# pip install llama-index-indices-managed-llama-cloud
index = LlamaCloudIndex(
  name="objective-wildfowl-2024-11-24", 
  project_name="Default",
  organization_id="2033a7fc-187e-48e4-a172-5079c4ee2bbf",
  api_key=os.environ['LLAMA_CLOUD_API_KEY']
)

In [369]:
query_engine = index.as_query_engine(
    dense_similarity_top_k=10,
    sparse_similarity_top_k=10,
    alpha=0.5,
    enable_reranking=True,
    rerank_top_n=5,
)

In [None]:
# print(output_obj.ItemsToBeImproved)
# print(output_obj.BriefDescription)
# print(output_obj.ImprovementDirection)
# print(output_obj.ImprovementPerson)
# print(output_obj.StartDate)
# print(output_obj.CompletionDate)
# print(output_obj.ImproveImmediately)
# print(output_obj.Confirmation)
# print(output_obj.Appendix)

In [384]:
# ItemsToBeImproved = output_obj.ItemsToBeImproved
# BriefDescription = output_obj.BriefDescription
# ImprovementDirection = output_obj.ImprovementDirection
# ImprovementPerson = output_obj.ImprovementPerson
# StartDate = output_obj.StartDate
# CompletionDate = output_obj.CompletionDate
# ImproveImmediately = output_obj.ImproveImmediately
# Confirmation = output_obj.Confirmation
# Appendix = output_obj.Appendix
ItemsToBeImproved

['The saturation of the lower glass point Xiaoli Pill is 65%',
 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%',
 'Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%',
 'The upper and lower glass bonding saturation is 67.5%',
 'Five-in-one front&back saturation53.2%',
 'CellstickFPCsaturation61.9%',
 'FPCFoam resistance test saturation40.8%',
 'The saturation of amplified pills and sports cars is 73.3%',
 'pointUVglue,UVsolidify&Check saturation33.3%',
 'Electrophoresis tank noteBufferliquid saturation79.2%',
 'BufferLiquid injection port sealing and solidification+Water leakage test saturation60.3%',
 'Silicone oil saturation 50.7%',
 'Silicone oil port sealing + dispensing + curing saturation 58%',
 'Set Conn & lower cover + paste QR CODE saturation 41%',
 'Lock the lid + label the saturation 81.7%',
 'Glass module test saturation 100%',
 'Visual inspection of UV glue + pull tape is the bottleneck station',
 'Mea

In [401]:
sllm.chat(['What is your name?'])

ValidationError: 1 validation error for LLMChatStartEvent
messages.0
  Input should be a valid dictionary or instance of ChatMessage [type=model_type, input_value='What is your name?', input_type=str]
    For further information visit https://errors.pydantic.dev/2.9/v/model_type

In [410]:
#Generate query and test if it generating them correctly
from tqdm import tqdm
items_to_be_improved = {}

for index, item in enumerate(ItemsToBeImproved):
    des = Description[index]
    job_saturation = f"Is the job saturation value lower than 95% for:'{des}'? Your response should be boolean. Either YES or NO?"
    input_msg = ChatMessage.from_str(job_saturation)
    output = llm.chat([input_msg])
    answer = output.message.content
    
    print(index,',',des,',',answer)
    
    # for (index, parameter) in enumerate(ImprovementParameters):
    #     des = Description[index]
    #     job_saturation = f"Is the job saturation value lower than 95% for:'{des}'? Your response should be boolean. Either YES or NO?"
    #     # query = f"What is the '{parameter}' for '{item}'? If you don't know the answer then say 'NA'"
    #     answer = str(query_engine.query(job_saturation))
    #     # items_to_be_improved[item][parameter] = answer
    #     print(des,',',answer)
    #     index +=1

0 , Low job saturation(lower than95%) , YES
1 , Low job saturation(lower than95%) , YES
2 , Low job saturation(lower than95%) , YES
3 , Low job saturation(lower than95%) , YES
4 , Low job saturation(lower than95%) , YES
5 , Low job saturation(lower than95%) , YES
6 , Low job saturation(lower than95%) , YES
7 , Low job saturation(lower than95%) , YES
8 , Low job saturation(lower than95%) , YES
9 , Low job saturation(lower than95%) , YES
10 , Low job saturation(lower than95%) , YES
11 , Low job saturation(lower than95%) , YES
12 , Low job saturation(lower than95%) , YES
13 , Low job saturation(lower than95%) , YES
14 , Low job saturation(lower than95%) , YES
15 , Job saturation 100% , NO
16 , Bottleneck station (the average C/T of the station is the largest) , YES
17 , Bottleneck station (the average C/T of the station is the largest) , NO
18 , Simultaneous working time of man and machine = manual time (39S) + automatic time (2S) - process standard C/T (41S) = 0S Time of man waiting for 

In [None]:
# items_to_be_improved

{'The saturation of the lower glass point Xiaoli Pill is 65%': {'Description': 'NA',
  'Improvement direction': "The improvement direction for 'The saturation of the lower glass point Xiaoli Pill is 65%' is: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",
  'Person responsible': 'LiXX',
  'Expected start date': '10/8/24',
  'Actual start date': '10/8/24',
  'Expected completion date': '10/15/24',
  'Actual completion date': '10/15/24',
  'Improve immediately': 'yes',
  'Confirmation': 'LiXX',
  'appendix': 'NA'},
 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%': {'Description': 'The \'Description\' for \'Bottom glass 

In [370]:
from tqdm import tqdm
def generate_answers(ItemsToBeImproved: List[str], ImprovementParameters: List[str]) -> List[str]:
    items_to_be_improved = {}
    for item in ItemsToBeImproved[:5]:
        items_to_be_improved[item] = {}
        for parameter in tqdm(ImprovementParameters):
            query = f"What is the '{parameter}' for '{item}'? If you don't know the answer then say 'NA'"
            answer = str(query_engine.query(query))
            items_to_be_improved[item][parameter] = answer
    return items_to_be_improved

In [162]:
# ItemsToBeImproved = output_obj.ItemsToBeImproved
# ImprovementParameters = output_obj.TrueCause

In [371]:
answers = generate_answers(ItemsToBeImproved, ImprovementParameters)

 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...
 ... (more hidden) ...


In [375]:
answers

{'The saturation of the lower glass point Xiaoli Pill is 65%': {'': 'NA',
  'Improvement direction': "The improvement direction for 'The saturation of the lower glass point Xiaoli Pill is 65%' is: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",
  'Person responsible': 'LiXX',
  'Expected start date': '10/8/24',
  'Actual start date': '10/8/24',
  'Expected completion date': '10/15/24',
  'Acutal completion date': '10/15/24',
  'Improve immediately': 'yes',
  'Confirmation': 'LiXX',
  'appendix': 'NA'},
 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%': {'': 'NA',
  'Improvement direction': 'Judgment of value and reduce

In [372]:
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)
flat_data = []
for parameter, metrics in answers.items():
    flat_metrics = flatten_dict(metrics)
    flat_data.append(flat_metrics)

In [306]:
# flat_metrics

In [307]:
# flat_data

In [56]:
# answers = json.loads("""
# {'The saturation of the lower glass point Xiaoli Pill is 65%': 'Judgment of value and reduce tasks without added value, Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards, Automated level inspection: simple and automated import, Merge and rearrange new job elements.',
#  'Bottom glass electrophoresis tank+UVFixed baking operation saturation 79.2%': 'The improvement direction for Bottom glass electrophoresis tank+UVFixed baking operation saturation 79.2% is to reduce tasks without added value, inspect movement quality and human engineering hazards, conduct automated level inspection, and merge and rearrange new job elements.',
#  'Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%': 'The improvement direction for Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75% is to reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards.'}
# """)

In [387]:
flat_data

[{'': 'NA',
  'Improvement direction': "The improvement direction for 'The saturation of the lower glass point Xiaoli Pill is 65%' is: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",
  'Person responsible': 'LiXX',
  'Expected start date': '10/8/24',
  'Actual start date': '10/8/24',
  'Expected completion date': '10/15/24',
  'Acutal completion date': '10/15/24',
  'Improve immediately': 'yes',
  'Confirmation': 'LiXX',
  'appendix': 'NA',
  'Items to be improved': 'The saturation of the lower glass point Xiaoli Pill is 65%'},
 {'': 'NA',
  'Improvement direction': 'Judgment of value and reduce tasks without added value, Inspection of movement q

In [373]:
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# Flatten the nested dictionary
flat_data = []
for parameter, metrics in answers.items():
    flat_metrics = flatten_dict(metrics)
    flat_metrics['Items to be improved'] = parameter 
    flat_data.append(flat_metrics)

# Get all unique keys to use as CSV headers
headers = set()
for item in flat_data:
    headers.update(item.keys())

# Sort headers to ensure 'Items to be improved' comes first
headers = sorted(headers)
headers.insert(0, headers.pop(headers.index('Items to be improved')))
headers.insert(1, headers.pop(headers.index('Description')))
headers.insert(2, headers.pop(headers.index('Improvement direction')))
headers.insert(3, headers.pop(headers.index('Person responsible')))
headers.insert(4, headers.pop(headers.index('Expected start date')))
headers.insert(5, headers.pop(headers.index('Actual start date')))
headers.insert(6, headers.pop(headers.index('Expected completion date')))
headers.insert(7, headers.pop(headers.index('Actual completion date')))
headers.insert(8, headers.pop(headers.index('Improve immediately')))
headers.insert(9, headers.pop(headers.index('Confirmation')))
headers.insert(10, headers.pop(headers.index('appendix')))

# Write to CSV
with open('Report_format_2_complete.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=headers)
    writer.writeheader()
    for row in flat_data:
        writer.writerow(row)

ValueError: 'Description' is not in list

In [324]:
import pandas as pd
from IPython.core.display import HTML
pd.set_option('display.max_colwidth', 10)
out_df = pd.read_csv("Report_format_2_complete.csv")
html = out_df.to_html()
HTML(html)

Unnamed: 0,Items to be improved,Description,Improvement direction,Person responsible,Expected start date,Actual start date,Expected completion date,Actual completion date,Improve immediately,Confirmation,appendix
0,The saturation of the lower glass point Xiaoli Pill is 65%,,"The improvement direction for 'The saturation of the lower glass point Xiaoli Pill is 65%' is: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,
1,Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%,,"The 'Improvement direction' for 'Bottom glass electrophoresis tank+UVFixed baking operation saturation79.2%' is as follows: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX
2,Xiaoliwan wax+Paste conductive foam+Lower glass glue frame dispensing operation saturation75%,,"1. Judgment of value and reduce tasks without added value\n2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards\n3. Automated level inspection: simple and automated import\n4. Merge and rearrange new job elements",LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX
3,The upper and lower glass bonding saturation is 67.5%,,"The improvement direction for 'The upper and lower glass bonding saturation is 67.5%' is as follows: 1. Judgment of value and reduce tasks without added value 2. Inspection of movement quality and human engineering hazards: Reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards 3. Automated level inspection: simple and automated import 4. Merge and rearrange new job elements.",LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,
4,Five-in-one front&back saturation53.2%,,"The 'Improvement direction' for 'Five-in-one front&back saturation53.2%' is to reduce the number of movements, work with both hands at the same time, shorten the distance of movements, and make movements easier; eliminate human engineering hazards.",LiXX,10/8/24,10/8/24,10/15/24,10/15/24,,LiXX,


In [325]:
out_df.to_excel("Report_format_2_complete.xlsx", index=False)

In [None]:
#Creating new duration column for plotting the Gantt Chart

In [326]:
import os
import pandas as pd
file_path = '../llama_parse_al/Report_format_2_complete.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Items to be improved,Description,Improvement direction,Person responsible,Expected start date,Actual start date,Expected completion date,Actual completion date,Improve immediately,Confirmation,appendix
0,The sa...,,The im...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,
1,Bottom...,,The 'I...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX
2,Xiaoli...,,1. Jud...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX
3,The up...,,The im...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,
4,Five-i...,,The 'I...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,,LiXX,


In [327]:
df['duration'] = pd.to_datetime(df['Actual completion date']) - pd.to_datetime(df['Actual start date'])


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [328]:
df.head()

Unnamed: 0,Items to be improved,Description,Improvement direction,Person responsible,Expected start date,Actual start date,Expected completion date,Actual completion date,Improve immediately,Confirmation,appendix,duration
0,The sa...,,The im...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,,7 days
1,Bottom...,,The 'I...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX,7 days
2,Xiaoli...,,1. Jud...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,LiXX,7 days
3,The up...,,The im...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,yes,LiXX,,7 days
4,Five-i...,,The 'I...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,,LiXX,,7 days


In [329]:
# Position a column to a specified place in pandas
columns = list(df.columns)
columns.remove('duration')
insert_at = columns.index('Actual completion date') + 1
columns.insert(insert_at, 'duration')
df = df[columns]

In [330]:
# Save the excel file after formatting is done
df.to_excel('Report_format_2_complete_input.xlsx', index=False)

In [265]:
df.head()

Unnamed: 0,Items to be improved,Description,Improvement direction,Person responsible,Expected start date,Actual start date,Expected completion date,Actual completion date,duration,Improve immediately,Confirmation,appendix
0,The sa...,,The im...,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,7 days,yes,LiXX,
1,Bottom...,,,LiXX,10/8/24,10/8/24,10/15/24,10/15/24,7 days,yes,LiXX,LiXX


In [331]:
import os
file_path = '../llama_parse_al/Report_format_2_complete_input.xlsx'
os.path.exists(file_path)

True

In [332]:
from openpyxl import load_workbook
from openpyxl.chart import BarChart, Reference
from openpyxl.chart.label import DataLabelList
# Step 1: Load the Excel file
# file_path = "data.xlsx"  # Replace with your Excel file path
workbook = load_workbook(file_path)
sheet = workbook.active  # Use the active sheet (or specify by name: workbook['SheetName'])

# Step 2: Identify the data range (e.g., A1:B5)
# Assuming the first column has categories (e.g., tasks) and the second has values (e.g., counts)
categories = Reference(sheet, min_col=1, min_row=2, max_row=sheet.max_row)  # Task names
values = Reference(sheet, min_col=9, min_row=1, max_row=sheet.max_row)  # Values including header

# Step 3: Create a horizontal bar chart
chart = BarChart()
chart.type = "bar"  # Horizontal bar chart
chart.title = "Timeline Chart"
chart.y_axis.title = "Days"
chart.x_axis.title = "Tasks"
# Add data and categories to the chart
chart.add_data(values, titles_from_data=True)
chart.set_categories(categories)
# Step 5: Add data labels
data_labels = DataLabelList()  # Create a DataLabelList object
data_labels.showVal = True  # Show values on the bars
data_labels.position = 'inBase'
chart.dLbls = data_labels
chart.legend = None

# Step 4: Add the chart to the worksheet
sheet.add_chart(chart, "M2")  # Place the chart in column D, row 2

# Step 5: Save the updated Excel file
workbook.save("updated_data.xlsx")
print("Chart added to 'updated_data.xlsx'")

Chart added to 'updated_data.xlsx'


In [283]:
chart.dLbls

<openpyxl.chart.label.DataLabelList object>
Parameters:
delete=None, dLbl=[], numFmt=None, spPr=None, txPr=None, dLblPos=None, showLegendKey=None, showVal=True, showCatName=None, showSerName=None, showPercent=None, showBubbleSize=None, showLeaderLines=None, separator=None

In [159]:
from openpyxl import load_workbook
workbook = load_workbook("Report_format_2_complete.xlsx")

In [160]:
# Select the active worksheet (or specify the sheet by name)
sheet = workbook.active  # or workbook["SheetName"]

# Insert a new row at the beginning
sheet.insert_rows(1)  # Insert an empty row at the top

# Merge two cells (e.g., A1 and B1)
sheet.merge_cells("E1:F1")

# Write data into the merged cells
sheet["E1"] = "Date"

# Save the workbook
workbook.save("example.xlsx")

print("New row added at the beginning!")


New row added at the beginning!


In [None]:
from openpyxl.chart import 

ImportError: cannot import name 'Ganttchart' from 'openpyxl.chart' (C:\Users\Muhammad Al-Amin\Desktop\Al-Amin\llama_parse_al\env\lib\site-packages\openpyxl\chart\__init__.py)

In [162]:
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
from openpyxl.styles import NamedStyle
from datetime import datetime, timedelta

# Step 1: Create workbook and add data
wb = Workbook()
ws = wb.active
ws.title = "Gantt Chart"

# Define task data
data = [
    ["Task", "Start Date", "Duration (Days)"],
    ["Task 1", datetime(2024, 1, 1), 5],
    ["Task 2", datetime(2024, 1, 6), 3],
    ["Task 3", datetime(2024, 1, 9), 7],
    ["Task 4", datetime(2024, 1, 16), 4],
]

# Add data to worksheet
for row in data:
    ws.append(row)

# Apply date format to start date column
date_style = NamedStyle(name="date_style", number_format="MM/DD/YYYY")
for row in ws.iter_rows(min_row=2, max_row=len(data), min_col=2, max_col=2):
    for cell in row:
        cell.style = date_style

# Step 2: Calculate start offsets
start_date = data[1][1]  # The earliest start date (Task 1 start date)
ws["D1"] = "Start Offset (Days)"
for i in range(2, len(data) + 1):
    ws[f"D{i}"] = f"=B{i} - DATE({start_date.year}, {start_date.month}, {start_date.day})"

# Step 3: Create a BarChart for the Gantt chart
chart = BarChart()
chart.type = "bar"
chart.style = 12
chart.title = "Gantt Chart"
chart.y_axis.title = "Tasks"
chart.x_axis.title = "Days"
chart.barDir = "bar"  # Horizontal bars

# Add data for the chart
duration_values = Reference(ws, min_col=3, min_row=2, max_row=len(data))  # Durations
offset_values = Reference(ws, min_col=4, min_row=2, max_row=len(data))  # Start offsets
chart.add_data(duration_values, titles_from_data=False)

# Set categories (tasks) as labels
categories = Reference(ws, min_col=1, min_row=2, max_row=len(data))
chart.set_categories(categories)

# Set start offset as the base (stacked bar chart)
chart.add_data(offset_values, from_rows=False)

# Step 4: Add the chart to the worksheet
ws.add_chart(chart, "F2")

# Step 5: Save the workbook
wb.save("gantt_chart.xlsx")


In [167]:
from openpyxl import Workbook
from openpyxl.chart import BarChart, Series, Reference

wb = Workbook(write_only=True)
ws = wb.create_sheet()

rows = [
    ('Number', 'Batch 1', 'Batch 2'),
    (2, 10, 30),
    (3, 40, 60),
    (4, 50, 70),
    (5, 20, 10),
    (6, 10, 40),
    (7, 50, 30),
]

for row in rows:
    ws.append(row)

chart1 = BarChart()
chart1.type = "col"
chart1.style = 10
chart1.title = "Bar Chart"
chart1.y_axis.title = 'Test number'
chart1.x_axis.title = 'Sample length (mm)'

data = Reference(ws, min_col=2, min_row=1, max_row=7, max_col=3)
cats = Reference(ws, min_col=1, min_row=2, max_row=7)
chart1.add_data(data, titles_from_data=True)
chart1.set_categories(cats)
chart1.shape = 4
ws.add_chart(chart1, "A10")

from copy import deepcopy

chart2 = deepcopy(chart1)
chart2.style = 11
chart2.type = "bar"
chart2.title = "Horizontal Bar Chart"

ws.add_chart(chart2, "G10")


chart3 = deepcopy(chart1)
chart3.type = "col"
chart3.style = 12
chart3.grouping = "stacked"
chart3.overlap = 100
chart3.title = 'Stacked Chart'

ws.add_chart(chart3, "A27")


chart4 = deepcopy(chart1)
chart4.type = "bar"
chart4.style = 13
chart4.grouping = "percentStacked"
chart4.overlap = 100
chart4.title = 'Percent Stacked Chart'

ws.add_chart(chart4, "G27")

wb.save("bar.xlsx")

In [168]:
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
from datetime import datetime

# Step 1: Create a workbook and worksheet
wb = Workbook()
ws = wb.active
ws.title = "Gantt Chart"

# Step 2: Prepare data (tasks, start dates, durations)
data = [
    ["Task", "Start Date", "Duration (Days)", "Start Offset (Days)"],
    ["Task 1", datetime(2024, 1, 1), 5, 0],  # Offset calculated manually
    ["Task 2", datetime(2024, 1, 6), 3, 5],
    ["Task 3", datetime(2024, 1, 9), 7, 8],
    ["Task 4", datetime(2024, 1, 16), 4, 15],
]

# Add headers and data to the worksheet
for row in data:
    ws.append(row)

# Format date column
for cell in ws["B"]:
    if cell.row != 1:  # Skip the header
        cell.number_format = "MM/DD/YYYY"

# Step 3: Create a stacked bar chart
chart = BarChart()
chart.type = "bar"
chart.style = 10
chart.title = "Gantt Chart"
chart.y_axis.title = "Tasks"
chart.x_axis.title = "Days"
chart.barDir = "bar"  # Use horizontal bars

# Reference durations and offsets for the chart
duration_values = Reference(ws, min_col=3, min_row=2, max_row=len(data))  # Durations
offset_values = Reference(ws, min_col=4, min_row=2, max_row=len(data))  # Start offsets

# Add the data series
chart.add_data(duration_values, titles_from_data=False)  # Duration as the main bar
chart.add_data(offset_values, titles_from_data=False)    # Offset as the base

# Set task names as categories
categories = Reference(ws, min_col=1, min_row=2, max_row=len(data))
chart.set_categories(categories)

# Step 4: Add the chart to the worksheet
ws.add_chart(chart, "F2")

# Step 5: Save the workbook
wb.save("gantt_chart.xlsx")


In [181]:
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
from openpyxl.styles import NamedStyle
from datetime import datetime

# Step 1: Create a workbook and worksheet
wb = Workbook()
ws = wb.active
ws.title = "Gantt Chart"

# Step 2: Add task data (Task Name, Start Date, Duration)
data = [
    ["Task", "Start Date", "Duration (Days)"],
    ["Task 1", datetime(2024, 1, 1), 5],
    ["Task 2", datetime(2024, 1, 6), 3],
    ["Task 3", datetime(2024, 1, 10), 7],
    ["Task 4", datetime(2024, 1, 18), 4],
]

# Add header and data to worksheet
for row in data:
    ws.append(row)

# Apply date format to Start Date column
date_style = NamedStyle(name="date_style", number_format="MM/DD/YYYY")
for row in ws.iter_rows(min_row=2, max_row=len(data), min_col=2, max_col=2):
    for cell in row:
        cell.style = date_style

# Step 3: Calculate "Start Offset" column
start_date = data[1][1]  # Earliest start date
ws["D1"] = "Start Offset (Days)"  # Add header for Start Offset
for i, row in enumerate(data[1:], start=2):
    ws[f"D{i}"] = f"=B{i} - DATE({start_date.year}, {start_date.month}, {start_date.day})"

# Step 4: Create the Gantt chart
chart = BarChart()
chart.type = "bar"
chart.style = 11
chart.title = "Project Timeline"
chart.y_axis.title = "Tasks"
chart.x_axis.title = "Days"
chart.barDir = "bar"  # Horizontal bars

# Add data for the Gantt chart
# Series 1: Start Offset (empty space)
start_offset = Reference(ws, min_col=4, min_row=2, max_row=len(data))
# Series 2: Duration (bar length)
duration = Reference(ws, min_col=3, min_row=2, max_row=len(data))
chart.add_data(duration, titles_from_data=False)

# Set task names as categories
categories = Reference(ws, min_col=1, min_row=2, max_row=len(data))
chart.set_categories(categories)

# Add the chart to the worksheet
ws.add_chart(chart, "F2")

# Step 5: Save the workbook
wb.save("Gantt_Chart_new.xlsx")
print("Gantt chart created successfully!")


Gantt chart created successfully!


In [180]:
import openpyxl
import plotly.express as px

# Load Excel data
workbook = openpyxl.load_workbook('project_data.xlsx')
worksheet = workbook.active

# Extract data
tasks = []
start_dates = []
end_dates = []
for row in worksheet.iter_rows(min_row=2, values_only=True):
    tasks.append(row[0])
    start_dates.append(row[1])
    end_dates.append(row[2])

# Create Gantt chart using Plotly
fig = px.timeline(
    dict(Task=tasks, Start=start_dates, Finish=end_dates),
    x_start="Start",
    x_end="Finish",
    y="Task"
)
fig.update_layout(title_text="Project Gantt Chart")
fig.show()  # Display the chart

In [178]:
import plotly.figure_factory as ff

df = pd.DataFrame([
    dict(Task="Task 1", Start='2024-12-01', Finish='2024-12-05'),
    dict(Task="Task 2", Start='2024-12-05', Finish='2024-12-08'),
    dict(Task="Task 3", Start='2024-12-06', Finish='2024-12-10')
])

fig = ff.create_gantt(df, title='Project Gantt Chart',
                       show_colorbar=True, bar_width=0.5, showgrid_x=False, showgrid_y=False)

fig.show()

In [175]:
    # Example 5: Use a pandas dataframe

from plotly.figure_factory import create_gantt
import pandas as pd

# Make data as a dataframe
df = pd.DataFrame([['Run', '2010-01-01', '2011-02-02', 10],
                    ['Fast', '2011-01-01', '2012-06-05', 55],
                    ['Eat', '2012-01-05', '2013-07-05', 94]],
                   columns=['Task', 'Start', 'Finish', 'Complete'])

# Create a figure with Plotly colorscale
fig = create_gantt(df, colors='Blues', index_col='Complete',
                  show_colorbar=True, bar_width=0.5,
                    showgrid_x=True, showgrid_y=True)
fig.show()

In [185]:
from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference
from datetime import datetime

# Step 1: Create a workbook and worksheet
wb = Workbook()
ws = wb.active
ws.title = "Gantt Chart"

# Step 2: Prepare data (tasks, start dates, durations)
data = [
    ["Task", "Start Date", "Duration (Days)", "Start Offset (Days)"],
    ["Task 1", datetime(2024, 1, 1), 5, 0],  # Offset calculated manually
    ["Task 2", datetime(2024, 1, 6), 3, 5],
    ["Task 3", datetime(2024, 1, 9), 7, 8],
    ["Task 4", datetime(2024, 1, 16), 4, 15],
]

# Add headers and data to the worksheet
for row in data:
    ws.append(row)

# Format date column
for cell in ws["B"]:
    if cell.row != 1:  # Skip the header
        cell.number_format = "MM/DD/YYYY"

# Step 3: Create a stacked bar chart
chart = BarChart()
chart.type = "bar"
chart.style = 10
chart.overlap=100
chart.grouping = "percentStacked"
chart.title = "Gantt Chart"
chart.y_axis.title = "Tasks"
chart.x_axis.title = "Days"
chart.barDir = "bar"  # Use horizontal bars

# Reference durations and offsets for the chart
duration_values = Reference(ws, min_col=3, min_row=2, max_row=len(data))  # Durations
offset_values = Reference(ws, min_col=4, min_row=2, max_row=len(data))  # Start offsets

# Add the data series
chart.add_data(duration_values, titles_from_data=False)  # Duration as the main bar
chart.add_data(offset_values, titles_from_data=False)    # Offset as the base

# Set task names as categories
categories = Reference(ws, min_col=1, min_row=2, max_row=len(data))
chart.set_categories(categories)

# Step 4: Add the chart to the worksheet
ws.add_chart(chart, "F2")

# Step 5: Save the workbook
wb.save("gantt_chart_old.xlsx")
