Version 1.0, 1-10-2025

# **5. Skript: Formattierung**

**Skript läuft in Azure Machine Learning Studio Empfohlene Compute-Umgebung: 16 Kerne, 64 GB RAM, 400 GB Festplatte (CPU) Kernel: Python 3.10 SDK v2**

**1. Zelle: Installationen von zusätzlichen Libraries**

In [None]:
%pip install openpyxl==3.1.5

**2. Zelle: Imports, define parameters, set hardcoded information**

In [None]:
# 14.3.25: fkt. für HMR
# 11.4.24: formatting articles works
# colors also clusters in Allg. Sheets
# introduces thin borders everywhere


from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Border, Side
from openpyxl.styles import Alignment

input_file = "/YOUR/INPUT/FOLDER/PATH/HERE/FILE.XLSX"
output_file = f'{input_file}_formatted.xlsx'

**3. Zelle: Hauptskript**

In [None]:
#from openpyxl.utils import get_column_letter

def color_rows_by_cluster(sheet):
    cluster_colors = {}
    for row_num, row in enumerate(sheet.iter_rows(min_row=2, values_only=True), start=2):
        cluster = row[0]  # Assuming "cluster" is the first column
        if cluster not in cluster_colors:
            color = generate_color(len(cluster_colors))  # Generate a unique light color for each cluster
            cluster_colors[cluster] = color
        
        fill = PatternFill(start_color=cluster_colors[cluster], end_color=cluster_colors[cluster], fill_type="solid")
        border = Border(left=Side(style='thin', color='000000'),
                        right=Side(style='thin', color='000000'),
                        top=Side(style='thin', color='000000'),
                        bottom=Side(style='thin', color='000000'))

        for col_num, cell_value in enumerate(row, start=1):
            cell = sheet.cell(row=row_num, column=col_num)
            cell.fill = fill
            cell.border = border

def generate_color(cluster_index):
    # Generate a light color based on the cluster index
    light_colors = ['FFFF99', 'FFCCCC', '99FF99', '99FFFF', '9999FF', 'FF99FF', 'FF9999', 'CCCCCC']
    return light_colors[cluster_index % len(light_colors)]

# ev. rausnehmen
def draw_line_for_artikel(sheet):
    # Read header from the first row
    header = [str(cell) for cell in next(sheet.iter_rows(min_row=1, max_row=1, values_only=True))]
    
    # Find the column index of "Artikel"
    try:
        artikel_col_index = header.index("Artikel")
    except ValueError:
        raise ValueError("Column 'Artikel' not found in the sheet header")
    
    prev_artikel = None

    for row_num, row in enumerate(sheet.iter_rows(min_row=0, max_row=sheet.max_row, values_only=True), start=2):
        artikel = str(row[artikel_col_index]) if row[artikel_col_index] is not None else ""  #draws a line after each new artikel from the column "Artikel"
        if artikel != prev_artikel:
            if row_num > 2:
                thick_border = Border(top=Side(style='thick'), 
                                      right=Side(style='thin', color='000000'), 
                                      left=Side(style='thin', color='000000'))
                for cell in sheet[row_num - 1]:
                    cell.border = thick_border
            prev_artikel = artikel

#align and wrap all text            
def align_text(sheet):
    for row in sheet.iter_rows(min_row=1, max_row=sheet.max_row, min_col=1, max_col=sheet.max_column):
        for cell in row:
            if cell.value is not None:
                cell.alignment = Alignment(wrap_text=True, vertical='top', horizontal='left')

def format_workbook(input_file, output_file):
    wb = load_workbook(filename=input_file, data_only=False)
    for sheet in wb:
        #sheet.freeze_panes = sheet['A2']  # Freezes the first row
        sheet.freeze_panes = sheet['D2'] # Freezes the first row and the first three columns
        align_text(sheet)
        if len(sheet[1]) >= 7:  # Check if the sheet has six or more columns
            artikel_column = any("Artikel" in cell.value for cell in sheet[1])  # Check if "Artikel" exists in any cell of the header row
            if artikel_column:
                color_rows_by_cluster(sheet)
                draw_line_for_artikel(sheet) #reinnehmen, damit dicke Linie zwischen Artikeln gezogen wird
            else:
                color_rows_by_cluster(sheet)
        else:
            color_rows_by_cluster(sheet)
    
    wb.save(output_file)

    print(f"Translated data saved to {output_file}")

# Example usage
format_workbook(input_file, output_file)