In [None]:
import pandas as pd
import numpy as np

In [None]:
def zy_calc_tpm(dt, le, ID="name", le_name="length"):
    """
    Calculate TPM (Transcripts Per Million) for gene expression data.

    Args:
        dt (pd.DataFrame): DataFrame of read counts (genes as rows, samples as columns).
        le (pd.DataFrame): DataFrame with transcript lengths (rows identified by 'ID').
        ID (str): Column in `le` that identifies transcript names.
        le_name (str): Column in `le` that contains transcript lengths.

    Returns:
        pd.DataFrame: TPM-normalized data.
    """
    # Ensure that the `le` DataFrame is indexed by transcript IDs
    le = le.set_index(ID)

    # Align `le` with `dt` rows based on their indices
    le = le.loc[dt.index]

    # Divide counts by transcript lengths
    dt_norm = dt.div(le[le_name], axis=0)

    # Calculate column sums and normalize by scaling to 1e6
    tpm = dt_norm.div(dt_norm.sum(axis=0), axis=1) * 1e6

    return tpm