# Conditional formatting

In [51]:
import random

random.randint(0, 10)

8

In [54]:
import pandas as pd
import numpy as np

sales = [-10, -15, 0, 1850.00, 430.20, 998.10, 760.00]
df_sales = pd.DataFrame({
    "id": [i for i in range(len(sales))],
    "price_increase": [i * random.randint(5, 10) for i in range(len(sales))],
    "fund_amount": sales,
    'amount_adjusted': [i*11 for i in sales], 
    # "date": pd.date_range("2026-01-01", periods=5)
})
df_sales

Unnamed: 0,id,price_increase,fund_amount,amount_adjusted
0,0,0,-10.0,-110.0
1,1,7,-15.0,-165.0
2,2,18,0.0,0.0
3,3,21,1850.0,20350.0
4,4,36,430.2,4732.2
5,5,50,998.1,10979.1
6,6,60,760.0,8360.0


In [55]:
with pd.ExcelWriter(f'output/excel_cond_format_df.xlsx', 
                    engine = "xlsxwriter"
                    ) as writer:
    df_sales.to_excel(writer, 
                      index = False, 
                      sheet_name = 'Tab sales'
                      )
    wb  = writer.book
    ws  = writer.sheets['Tab sales']

    # ------------- Helpers -------------
    def colnum_to_excel(n: int) -> str:
        """0-based column index -> Excel letters"""
        s = ""
        n += 1
        while n:
            n, r = divmod(n - 1, 26)
            s = chr(65 + r) + s
        return s



    nrows = len(df_sales)
    ncols = len(df_sales.columns)

    # ------------- Header filter + freeze panes -------------
    ws.autofilter(0, 0, nrows, ncols - 1)
    ws.freeze_panes(1, 0)

    # ------------- Number formats -------------
    thousands_fmt = wb.add_format({"num_format": "#,##0"})
    red_zero_thousands_fmt = wb.add_format({"font_color": "red", "num_format": "#,##0"})

    # Apply thousands separator as the default column format for the two count columns
    for name in ["fund_amount"]:
        idx = df_sales.columns.get_loc(name)
        # Keep column width as set above; attach thousands format as default
        ws.set_column(idx, idx, None, thousands_fmt)

    # If '%_change' is numeric, optionally display as percent if values look like decimals (0.12 -> 12%)
    pct_idx = df_sales.columns.get_loc("amount_adjusted")
    if pd.api.types.is_numeric_dtype(df_sales["amount_adjusted"]):
        s = df_sales["amount_adjusted"].dropna()
        if len(s) and (s.between(-1.5, 1.5)).mean() > 0.8:
            pct_display_fmt = wb.add_format({"num_format": "0.00%"})
        else:
            # Already in percentage points (e.g., 12.3 meaning 12.3%)
            pct_display_fmt = wb.add_format({"num_format": "0.00"})
        ws.set_column(pct_idx, pct_idx, None, pct_display_fmt)

    # ------------- Conditional formatting: zero values in count columns -------------
    for name in ["price_increase"]:
        col_idx = df_sales.columns.get_loc(name)
        col_letter = colnum_to_excel(col_idx)
        cell_range = f"{col_letter}2:{col_letter}{nrows + 1}"  # data rows
        ws.conditional_format(cell_range, {
            "type": "cell",
            "criteria": "==",
            "value": 0,
            "format": red_zero_thousands_fmt  # preserves thousands separator and turns text red
        })

    # ------------- Conditional formatting: %_change red↔white↔green -------------
    for name in ['fund_amount']:
        col_idx = df_sales.columns.get_loc(name)
        pct_letter = colnum_to_excel(col_idx)
        pct_range = f"{pct_letter}2:{pct_letter}{nrows + 1}"

        # Center the gradient at 0 (white). Negatives shade to red, positives shade to green.
        ws.conditional_format(pct_range, {
            "type": "3_color_scale",
            "min_color": "#F8696B",  # red
            "mid_color": "#FFFFFF",  # white
            "max_color": "#63BE7B",  # green
            "mid_type": "num",
            "mid_value": 0
        })

    # ------------- Auto-fit column widths -------------
    # We estimate width from the longest string among header and data.
    # For numeric columns that will be shown with thousand separators,
    # we format as such before measuring so the width accounts for commas.
    max_width_cap = 600
    for col_idx, col_name in enumerate(df_sales.columns):
        series = df_sales[col_name]
        if pd.api.types.is_numeric_dtype(series) and col_name in (
            "fund_amount", 'amount_adjusted'
        ):
            # Represent with thousands separators for width estimate
            series_as_str = series.map(lambda x: f"{int(x):,}" if pd.notna(x) else "")
        else:
            series_as_str = series.astype(str)
        max_len_data = series_as_str.map(len).max() if len(series_as_str) else 0
        header_len   = len(str(col_name))
        if col_name == 'amount_adjusted':
            print(series_as_str, col_name, len(str(col_name)), header_len, max_len_data)
        best_width   = min(
            max(
                max_len_data, 
                header_len
            ) + 10, 
            max_width_cap
        )
        ws.set_column(col_idx, col_idx, best_width)


0      -110
1      -165
2         0
3    20,350
4     4,732
5    10,979
6     8,360
Name: amount_adjusted, dtype: object amount_adjusted 15 15 6


# Save multiple dfs to excel

In [56]:
import random 

import numpy as np
import pandas as pd
from openpyxl.styles import Font, Alignment


In [60]:
# DataFrame 1: Employees
df_employees = pd.DataFrame({
    "employee_id": [1, 2, 3, 4],
    "name": ["Alice Catniss", "Bobra K.", "Carlos N.J.", "Dranei B."],
    "department": ["Data Science", "Finance", "Marketing", "Data Science"],
    "salary_usd": [90000, 75000, 68000, 95000]
})

# DataFrame 2: Sales Data
df_sales = pd.DataFrame({
    "sale_id": np.arange(1, 6),
    "employee_id": [1, 1, 2, 4, 3],
    "amount": [1200.50, 850.00, 430.20, 998.10, 760.00],
    "date": pd.date_range("2026-01-01", periods=5)
})

# Dataframe 3
ids = [1, 2, 3, 4, 5]
df_fake = pd.DataFrame({
    'id': ids,
    'variance': [i * 100 for i in ids],
    'occlusion': [i * random.randint(1, 10) for i in ids]
})
df_employees

Unnamed: 0,employee_id,name,department,salary_usd
0,1,Alice Catniss,Data Science,90000
1,2,Bobra K.,Finance,75000
2,3,Carlos N.J.,Marketing,68000
3,4,Dranei B.,Data Science,95000


In [77]:
PATH = "output/df_excel_comparison.xlsx"
SHEET_NAME = 'Compare'
LEFT_OFFSET_COL = 3

def colnum_to_excel(n: int) -> str:
    """0-based column index -> Excel letters"""
    s = ""
    n += 1
    while n:
        n, r = divmod(n - 1, 26)
        s = chr(65 + r) + s
    return s

with pd.ExcelWriter(PATH, 
                    engine="openpyxl"
                    ) as writer:
    # Left table
    df_employees.to_excel(writer, 
                          sheet_name = SHEET_NAME, 
                          startrow = 2, 
                          startcol = LEFT_OFFSET_COL, 
                          index = False
                          )
    # Initialise worksheet object with `writer.book`
    ws = writer.book[SHEET_NAME]
    # main
    cell = ws.cell(row = 2, column = LEFT_OFFSET_COL + 1)
    cell.value = 'Employees dataframe'
    cell.font = Font(bold=True)
    for col_idx, col_name in enumerate(df_employees.columns):
        series = df_employees[col_name]
        if pd.api.types.is_numeric_dtype(series):
            series_as_str = series.map(lambda x: f"{int(x):,}" if pd.notna(x) else "")
        else:
            series_as_str = series.astype(str)
        max_len_data = series_as_str.map(len).max() if len(series_as_str) else 0
        header_len = len(str(col_name))
        best_width   = min(
            max(
                max_len_data, 
                header_len
            ) + 10, 
            max_width_cap
            )
        idxx = colnum_to_excel(
            col_idx + LEFT_OFFSET_COL
        )
        # this is openpyxl's alternative to ws.set_column
        ws.column_dimensions[idxx].width = best_width

    # Middle table (offset by df1’s width + 2 spacer columns)
    left_width = LEFT_OFFSET_COL + df_employees.shape[1] + 2
    df_sales.to_excel(writer, 
                      sheet_name = SHEET_NAME, 
                      startrow = 2, 
                      startcol = left_width, 
                      index = False
                      )
    cell = ws.cell(row = 2, column = left_width + 1)
    cell.value = 'Sales dataframe'
    cell.font = Font(bold = True)
    for col_idx, col_name in enumerate(df_sales.columns):
        series = df_sales[col_name]
        if pd.api.types.is_numeric_dtype(series):
            series_as_str = series.map(lambda x: f"{int(x):,}" if pd.notna(x) else "")
        else:
            series_as_str = series.astype(str)
        max_len_data = series_as_str.map(len).max() if len(series_as_str) else 0
        header_len = len(str(col_name))
        best_width   = min(
            max(
                max_len_data, 
                header_len
            ) + 10, 
            max_width_cap
            )
        idxx = colnum_to_excel(
            col_idx + LEFT_OFFSET_COL + len(df_employees.columns.tolist()) + 2
        )
        # this is openpyxl's alternative to ws.set_column
        ws.column_dimensions[idxx].width = best_width

    # Right table (offset by df1 + spacer + df2’s width + 2 spacers)
    middle_width = df_sales.shape[1] + 2
    df_fake.to_excel(writer, 
                     sheet_name = SHEET_NAME, 
                     startrow = 2, 
                     startcol = left_width + middle_width, 
                     index = False
                     )
    cell = ws.cell(row = 2, column = left_width + middle_width + 1)
    cell.value = 'Difference'
    cell.font = Font(bold = True)

In [84]:
os.getcwd()

'c:\\Users\\Evgeni Zorin\\OneDrive - Euromonitor International\\Desktop\\Handbook_Data-Science'

0