In [16]:
import re

In [17]:
equation_list = [
    "(current month-end discount factor / prior month-end discount factor - 1) * 12",
    "(1 + monthly rate) ^ 12",
    "(Prior Month Ending Loan Count) * Gross Default SMM",
    "(1 - (1 - Curtail SMM)^12)",
    "WAC - (Note Rate - Coupon Rate)",
    "Future Non-Earning FCL Completion Amount * (Foreclosure Convery % + Foreclosure Deed in Lieu %) * (Presale Months + Postsale Months)",
    "Net Default Count * (1-(Recovery% + Payoff% + Modification%))",
    "Foreclosure Count *(1-On-Time Refereral%)"
]

In [18]:
# extract varaible from the equations
# replace all math operators with comma delimiter, including +, -, *, /, ^, (, )
# note that hyphen with at least one space before or after is considered a minus sign
# return the list of variables for each equation

In [19]:
def extract_variables(equation):
    # Step 1: Replace operators, but not the hyphen, with a comma.
    # Operators to replace: +, *, /, ^, (, )
    equation_with_commas = re.sub(r"[\+\*/\^\(\)]", ',', equation)
    
    # Step 2: Replace hyphens that act as a minus sign with a comma.
    # A hyphen is a minus sign if it is surrounded by spaces, or is next to a comma.
    # This avoids replacing hyphens within variable names (e.g., "month-end").
    # The lookbehind (?<=...) and lookahead (?=...) are used to check for this context.
    final_comma_separated = re.sub(r'(?<=[\s,])-(?=[\s,])|(?<=[\s,])-|-(?=[\s,])', ',', equation_with_commas)

    # Split by comma to get potential variables
    potential_vars = final_comma_separated.split(',')
    
    variables = []
    for var in potential_vars:
        # Clean up whitespace
        cleaned_var = var.strip()
        # Filter out empty strings and numeric values
        if cleaned_var and not cleaned_var.isnumeric():
            variables.append(cleaned_var)
            
    return variables

# Process each equation in the list
for eq in equation_list:
    variables = extract_variables(eq)
    print(f"Equation: {eq}")
    print(f"Variables: {variables}\n")

Equation: (current month-end discount factor / prior month-end discount factor - 1) * 12
Variables: ['current month-end discount factor', 'prior month-end discount factor']

Equation: (1 + monthly rate) ^ 12
Variables: ['monthly rate']

Equation: (Prior Month Ending Loan Count) * Gross Default SMM
Variables: ['Prior Month Ending Loan Count', 'Gross Default SMM']

Equation: (1 - (1 - Curtail SMM)^12)
Variables: ['Curtail SMM']

Equation: WAC - (Note Rate - Coupon Rate)
Variables: ['WAC', 'Note Rate', 'Coupon Rate']

Equation: Future Non-Earning FCL Completion Amount * (Foreclosure Convery % + Foreclosure Deed in Lieu %) * (Presale Months + Postsale Months)
Variables: ['Future Non-Earning FCL Completion Amount', 'Foreclosure Convery %', 'Foreclosure Deed in Lieu %', 'Presale Months', 'Postsale Months']

Equation: Net Default Count * (1-(Recovery% + Payoff% + Modification%))
Variables: ['Net Default Count', 'Recovery%', 'Payoff%', 'Modification%']

Equation: Foreclosure Count *(1-On-Time 