<a href="https://colab.research.google.com/github/aknip/Coding-Cheatsheets/blob/main/Python-Excel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# tl;dr

- use openpyxl: It's the only solution which can read and **update** existing Excel files. Other frameworks can only **create new** Excel files
- all Excel frameworks can not work with computed values (formulas). The initial computed cell value can be read, but it is not updated after updating the sheet. After saving the cell value will be read as "empty"!
- for full formula suppart a "real" Excel engine is needed (Windows, Mac or server solution) and a corresponing framework


In [None]:
!pip install pandas openpyxl

In [None]:
import json
import os
import textwrap

Secrets (JSON string): ··········


# 1. Create Excel file

In [None]:
import pandas as pd
import openpyxl
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows

# see https://openpyxl.readthedocs.io/en/2.4/pandas.html

# Create a Pandas dataframe from the data.
df = pd.DataFrame({'Name': ['Miller', 'Adams', 'Smith'],
                   'Prompt': ['Write something', 'Do different things', 'Summarize it'], })

# Create a Pandas Excel writer using openpyxl as the engine.
writer = pd.ExcelWriter('llm_benchmark.xlsx', engine='openpyxl')

# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Benchmark', index=True)

# Get the xlsxwriter objects from the dataframe writer object.
workbook  = writer.book
worksheet = writer.sheets['Benchmark']

# Save the Excel file.
workbook.save('llm_benchmark.xlsx')

# 1.1 Style cells and columns

In [None]:
from openpyxl.utils import get_column_letter

# Column widhts
for i in range(1,3):
    ws.column_dimensions[openpyxl.utils.get_column_letter(i)].width = 15
ws.column_dimensions['A'].width = 10
ws.column_dimensions['B'].width = 100

# Date, number format
ws['A3'].number_format = 'DD.MM.YYYY'
ws.column_dimensions['A'].number_format = '#,##0.00' # does not work???
# workaround: loop through all cells
all_columns_cells = ws['A1:A{}'.format(ws.max_row)]
for row in all_columns_cells:
  for cell in row:
  cell.number_format = 'DD.MM.YYYY'
# or via function:
def style_column(worksheet, range_string, style_string):
    range_string2 = range_string.format(ws.max_row)
    all_cells = worksheet[range_string2]
    for row in all_cells:
        for cell in row:
            cell.number_format = style_string
style_column(ws, 'A2:A{}', 'DD.MM.YYYY')



# 2. Read Excel file

In [None]:
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook

workbook = load_workbook(filename = 'llm_benchmark.xlsx') # data_only=True
worksheet = workbook['Benchmark'] # workbook.active

# iterate through headers in row 1
# create dictionary to access columns
column = {}
for column_index, cell in enumerate(worksheet["1"]):
  column[cell.value] = column_index + 1

# iterate through all lines and rows
for row_index, row in enumerate(worksheet):
  for column_index, cell in enumerate(row):
    cell = worksheet.cell(row=row_index+1, column=column_index+1)
    if cell.value != None:
      print(cell.value)

# Get column nr for header name
print(column["Prompt"])

# read data
cell = worksheet.cell(row=2, column=2)
cell = worksheet["B2"]
print(cell.value)

# write data
cell.value = "Test"

# Save the Excel file.
workbook.save('llm_benchmark.xlsx')

3
Miller


# 3.1 Read Excel file as dataframe

In [None]:
import pandas as pd
#import xlrd

df = pd.read_excel('llm_benchmark.xlsx', sheet_name='Benchmark') # parse_dates=['date'] # dtype={'column_name': float}
#print(df.head())

print("Given Dataframe :\n", df)

print("\nIterating 1:")
for i in df.index:
    print(i, df['Name'][i],df['Prompt'][i])

print("\nIterating 2:")
for index, row in df.iterrows():
    name = row['Name']
    prompt = row['Prompt']
    print(f"{index}: {name}, {prompt}")

Given Dataframe :
    Unnamed: 0    Name               Prompt
0           0  Miller      Write something
1           1   Adams  Do different things
2           2   Smith         Summarize it

Iterating 1:
0 Miller Write something
1 Adams Do different things
2 Smith Summarize it

Iterating 2:
0: Miller, Write something
1: Adams, Do different things
2: Smith, Summarize it


# 3.2 Updata data in Dataframe and write Excel file

In [None]:
df.at[1, 'Name'] = 'Name-Updated'
df.at[2, 'Prompt'] = 'Prompt-Updated'
print(df.head())

   Unnamed: 0          Name               Prompt
0           0        Miller      Write something
1           1  Name-Updated  Do different things
2           2         Smith       Prompt-Updated


In [None]:
import pandas as pd
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
workbook = load_workbook(filename = 'llm_benchmark.xlsx')
worksheet = workbook.active

print("Updating Excel file:")
for index, row in df.iterrows():
    name = row['Name']
    prompt = row['Prompt']
    print(f"{index}: {name}, {prompt}")
    d = worksheet.cell(row=index+2, column=2, value=name)
    d = worksheet.cell(row=index+2, column=3, value=prompt)

# Alternative: delete data rows and append dataframe to worksheet
# continuously delete row 2 until there is only first row (header row) is left over
# while(worksheet.max_row > 1):
#     worksheet.delete_rows(2) # removes the row 2
# append dataframe to worksheet
# for row in dataframe_to_rows(df, index=False, header=False):
#     if row != [None]:
#        worksheet.append(row)

# Save the Excel file.
workbook.save('llm_benchmark.xlsx')

Updating Excel file:
0: Miller, Write something
1: Name-Updated, Do different things
2: Smith, Prompt-Updated


# 3.2.1 Updata data in Dataframe and write Excel file / Notes

Pandas to existing Excel

How to write data to an arbitrary location in an existing Excel workbook

1. manual, iterating:

https://stackoverflow.com/questions/72669750/how-to-update-an-existing-excel-spreadsheet-with-data-from-dataframe

2. df.to_excel

https://stackoverflow.com/questions/69628517/put-pandas-data-frame-to-existing-excel-sheet

3. General code examples: How to Process Excel Data in Python and Pandas

1 How to Process Excel Files with Python?
2 How to Create an Excel File in Python?
3 Export Pandas Dataframe to Excel Sheet in Python
4 Create an Excel File With Multiple Sheets in Python
5 How to Read Excel Data in Python?
6 Update an Excel File in Python
7 Performing Delete Operations on an Excel File
8 Merge Excel Sheets Into a CSV File in Python
9 Conclusion

https://python.land/data-processing/process-excel-data-in-python-and-pandas
