# Symbolic Regression with NeSymReS

In this part of the homework, you will explore the implementation and evaluation of Neural Symbolic Regression that Scales (NeSymReS), an approach to symbolic regression that leverages neural networks for predicting mathematical equations from data. Symbolic regression combines machine learning and symbolic computation to discover interpretable mathematical expressions that best describe given data. As we discussed in class, NeSymReS achieves this by integrating a transformer-based neural network for sequence generation with the BFGS optimization algorithm to refine coefficients in predicted equations. For details, see **Neural Symbolic Regression That Scales**.  [[`arXiv`](https://arxiv.org/pdf/2106.06427.pdf)] 


The following libraries will be necessary for your implementation. Ensure you have them installed before proceeding.

In [None]:
import omegaconf
import json
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
import time
from sympy import lambdify 
import sympy as sp
from generator import Generator, add_additive_constants, add_multiplicative_constants
from data import *
from scipy.optimize import minimize
from models import SetEncoder

**Task 1: Compute the symbolic loss**
* Evaluate the predicted equation for each data point and calculating the difference from the target.

**Task 2: Reconstruct optimized expression**
* Replace placeholders with optimized constants.

In [None]:
class BeamHypotheses:
    """
    A class to manage and store beam search hypotheses.
    """
    def __init__(self, n_hyp, max_len, length_penalty, early_stopping):
        """
        Initialize the hypotheses object.

        Args:
            n_hyp (int): Number of hypotheses to keep.
            max_len (int): Maximum sequence length.
            length_penalty (float): Penalty for longer sequences.
            early_stopping (bool): Whether to stop once enough hypotheses are found.
        """
        self.n_hyp = n_hyp
        self.max_len = max_len
        self.length_penalty = length_penalty
        self.early_stopping = early_stopping
        self.hypotheses = []
        self.worst_score = float("inf")

    def add(self, hypothesis, score):
        """
        Add a new hypothesis with its score.

        Args:
            hypothesis (list): The hypothesis sequence.
            score (float): The score of the hypothesis.
        """
        normalized_score = score / (len(hypothesis) ** self.length_penalty)
        if len(self.hypotheses) < self.n_hyp or normalized_score > self.worst_score:
            self.hypotheses.append((normalized_score, hypothesis))
            if len(self.hypotheses) > self.n_hyp:
                # Remove the worst-scoring hypothesis
                self.hypotheses.sort(key=lambda x: x[0])
                self.hypotheses.pop(0)
                self.worst_score = self.hypotheses[0][0]
            else:
                self.worst_score = min(normalized_score, self.worst_score)
    
class TimedFun:
    def __init__(self, fun, stop_after=10):
        self.fun_in = fun
        self.started = False
        self.stop_after = stop_after

    def fun(self, x, *args):
        if self.started is False:
            self.started = time.time()
        elif abs(time.time() - self.started) >= self.stop_after:
            raise ValueError("Time is over.")
        self.fun_value = self.fun_in(*x, *args)
        self.x = x
        return self.fun_value
    
def bfgs_fcn(pred_str, X, y, cfg):
    """
    Optimize coefficients of a predicted equation using BFGS.

    Args:
        pred_str (list): Predicted tokenized equation (prefix format).
        X (torch.Tensor): Input variables (shape: [batch_size, num_points, num_variables]).
        y (torch.Tensor): Target values (shape: [batch_size, num_points]).
        cfg: Configuration object with required settings.

    Returns:
        best_func (sympy.Expr): Optimized symbolic expression.
        best_coeffs (list): Optimized coefficients.
        best_loss (float): Loss associated with the best coefficients.
        expr (str): Equation structure with placeholders for coefficients.
    """
    # Prepare input data
    y = y.squeeze()
    X = X.clone()

    # Replace unused dimensions with 1 to avoid numerical issues
    bool_dim = (X == 0).all(axis=1).squeeze()
    X[:, :, bool_dim] = 1

    # Decode predicted structure
    pred_str = pred_str[1:].tolist()
    raw = de_tokenize(pred_str, cfg.id2word)

    # Add missing coefficients if needed
    if cfg.bfgs.add_coefficients_if_not_existing and 'constant' not in raw:
        print("No constants in predicted expression. Adding them.")
        variables = {x: sp.Symbol(x, real=True, nonzero=True) for x in cfg.total_variables}
        infix = Generator.prefix_to_infix(raw, coefficients=cfg.total_coefficients, variables=cfg.total_variables)
        s = Generator.infix_to_sympy(infix, variables, cfg.rewrite_functions)
        placeholder = {x: sp.Symbol(x, real=True, nonzero=True) for x in ["cm", "ca"]}
        s = add_multiplicative_constants(s, placeholder["cm"], unary_operators=cfg.una_ops)
        s = add_additive_constants(s, placeholder, unary_operators=cfg.una_ops)
        s = s.subs(placeholder["cm"], 0.43).subs(placeholder["ca"], 0.421)
        s_simplified = constants_to_placeholder(s, symbol="constant")
        prefix = Generator.sympy_to_prefix(s_simplified)
        candidate = Generator.prefix_to_infix(prefix, coefficients=["constant"], variables=cfg.total_variables)
    else:
        candidate = Generator.prefix_to_infix(raw, coefficients=["constant"], variables=cfg.total_variables)

    # Replace "constant" placeholders with unique symbols
    candidate = candidate.format(constant="constant")
    expr = candidate
    for i in range(candidate.count("constant")):
        expr = expr.replace("constant", f"c{i}", 1)

    print("Constructing BFGS loss...")

    # Compute the symbolic loss by evaluating the predicted equation
    # for each data point and calculating the difference from the target.
    diffs = []
    for i in range(X.shape[1]):
        curr_expr = expr
        for idx, var in enumerate(cfg.total_variables):
            # TODO: Evaluate symbolic expression
            # Hint: Use sympy's `subs` to substitute variable values into the expression.
            curr_expr = ...
        # TODO: Save the difference between the symbolic evaluation and actual values
        diff = ...
        diffs.append(diff)

    loss = np.mean(np.square(diffs))

    print("Loss constructed. Starting BFGS optimization...")

    # Optimize coefficients using BFGS
    F_loss = []
    consts_ = []
    funcs = []
    symbols = {i: sp.Symbol(f'c{i}') for i in range(candidate.count("constant"))}

    for _ in range(cfg.bfgs.n_restarts):
        x0 = np.random.randn(len(symbols))  # Initial guess for coefficients
        s = list(symbols.values())
        fun_timed = TimedFun(fun=sp.lambdify(s, loss, modules=['numpy']), stop_after=cfg.bfgs.stop_time)

        # Run BFGS optimization
        try:
            minimize(fun_timed.fun, x0, method='BFGS')
            consts_.append(fun_timed.x)
        except Exception as e:
            print(f"BFGS optimization failed: {e}")
            consts_.append([])

        # Reconstruct optimized expression
        # Hint: `s` is a list of SymPy symbols representing the placeholders (e.g., `c0`, `c1`, ...).
        #       This loop iterates over each symbol in `s` and replaces it with the corresponding optimized value
        #       from `fun_timed.x[i]`.
        # Example:
        # If `expr` = "c0 + c1*x", then `s = [c0, c1]` and `fun_timed.x = [2.5, 1.8]`.
        # The loop will replace:
        #   - `c0` with `2.5`
        #   - `c1` with `1.8`
        # Final result: "2.5 + 1.8*x"
        final = expr
        for i, sym in enumerate(s):
            # TODO: Reconstruct the optimized equation by replacing placeholders with optimized constants.
            # Hint: `fun_timed.x[i]` contains the optimized value for the i-th constant in the equation.
            #       Use sympy's `replace` method to substitute each constant symbol with its optimized value.
            final = ...

        funcs.append(final)

        # Evaluate final loss
        values = {var: X[:, :, idx].cpu() for idx, var in enumerate(cfg.total_variables)}
        y_found = sp.lambdify(",".join(cfg.total_variables), final)(**values).squeeze(0)
        final_loss = np.mean(np.square(y_found - y.cpu()).numpy())
        F_loss.append(final_loss)

    # Select the best solution
    try:
        k_best = np.nanargmin(F_loss)
    except ValueError:
        print("All-Nan slice encountered. Selecting default.")
        k_best = 0

    return funcs[k_best], consts_[k_best], F_loss[k_best], expr

**Task 3: Complete the implementation of beam search for symbolic regression**
* Implement beam search scoring and selection logic

In [None]:
class Model(pl.LightningModule):
    """
        Module for symbolic regression using a transformer-based architecture.

        This model encodes input data with a set encoder, decodes equations using a transformer decoder, 
        and predicts symbolic expressions. It supports auto-regressive generation and beam search for inference.
    """
    def __init__(self, cfg):
        super().__init__()
        self.enc = SetEncoder(cfg)
        self.trg_pad_idx = cfg.trg_pad_idx
        self.tok_embedding = nn.Embedding(cfg.output_dim, cfg.dim_hidden)
        self.pos_embedding = nn.Embedding(cfg.length_eq, cfg.dim_hidden)
        decoder_layer = nn.TransformerDecoderLayer(
            d_model=cfg.dim_hidden,
            nhead=cfg.num_heads,
            dim_feedforward=cfg.dec_pf_dim,
            dropout=cfg.dropout,
        )
        self.decoder_transfomer = nn.TransformerDecoder(decoder_layer, num_layers=cfg.dec_layers)
        self.fc_out = nn.Linear(cfg.dim_hidden, cfg.output_dim)
        self.cfg = cfg
        self.criterion = nn.CrossEntropyLoss(ignore_index=0)
        self.dropout = nn.Dropout(cfg.dropout)
        self.eq = None

    def make_trg_mask(self, trg):
        """
        Create masks for the target sequence to prevent attending to future tokens 
        during training and inference.

        Args:
            trg (torch.Tensor): Target sequence tensor of shape (batch_size, trg_len).

        Returns:
            tuple:
                - trg_pad_mask (torch.Tensor): Padding mask of shape (batch_size, trg_len), 
                                            where padded positions are masked with -inf 
                                            and valid positions are 0.0.
                - mask (torch.Tensor): Causal mask of shape (trg_len, trg_len), 
                                    masking future positions with -inf and 
                                    allowing only current and past positions.
        """
        trg_pad_mask = (trg != self.trg_pad_idx).float()
        trg_pad_mask = (
            trg_pad_mask.masked_fill(trg_pad_mask == 0, float("-inf"))
            .masked_fill(trg_pad_mask == 1, float(0.0))
            .type_as(trg)
        )
        trg_len = trg.shape[1]
        mask = (torch.triu(torch.ones(trg_len, trg_len)) == 1).transpose(0, 1)
        mask = (
            mask.float()
            .masked_fill(mask == 0, float("-inf"))
            .masked_fill(mask == 1, float(0.0))
            .type_as(trg)
        )
        return trg_pad_mask, mask
    
    def beam_search(self, enc_src, cfg_params):
        """
        Perform beam search to generate symbolic expressions.

        Args:
            enc_src (torch.Tensor): Encoded source tensor of shape 
                                    (beam_size, seq_length, hidden_dim), 
                                    output from the encoder.
            cfg_params: Configuration parameters for beam search, including:
                - beam_size (int): Number of beams to maintain during search.
                - length_eq (int): Maximum length of the generated equation.
                - word2id (dict): Mapping of vocabulary words to IDs.
                - id2word (dict): Mapping of IDs to vocabulary words.

        Returns:
            BeamHypotheses: An object containing the n-best hypotheses (generated equations) 
                            and their respective scores.
        """

        # Initialize beam search variables
        generated = torch.zeros(
            [cfg_params.beam_size, self.cfg.length_eq],
            dtype=torch.long
        )
        generated[:, 0] = 1  # Start-of-sequence (BOS) token
        cache = {"slen": 0}
        generated_hyps = BeamHypotheses(cfg_params.beam_size, self.cfg.length_eq, 1.0, 1)

        beam_scores = torch.zeros(cfg_params.beam_size, dtype=torch.long)
        beam_scores[1:] = -1e9  # Assign low scores to all beams except the first
        cur_len = torch.tensor(1, dtype=torch.int64)

        # Beam search loop
        while cur_len < self.cfg.length_eq:
            # Generate target masks
            generated_mask1, generated_mask2 = self.make_trg_mask(generated[:, :cur_len])

            # Compute positional and token embeddings
            pos = self.pos_embedding(
                torch.arange(0, cur_len)
                .unsqueeze(0)
                .repeat(generated.shape[0], 1)
                .type_as(generated)
            )
            te = self.tok_embedding(generated[:, :cur_len])
            trg_ = self.dropout(te + pos)

            # Decoder forward pass
            # NOTE: The decoder processes the input for ALL beams simultaneously.
            # Each beam has its own sequence (`generated`) and the decoder predicts
            # the next token for every beam at the same time.
            output = self.decoder_transfomer(
                trg_.permute(1, 0, 2), # Shape: (cur_len, beam_size, hidden_dim)
                enc_src.permute(1, 0, 2), # Shape: (seq_len, beam_size, hidden_dim)
                generated_mask2.float(), # Attention mask
                tgt_key_padding_mask=generated_mask1.bool(), # Padding mask
            )
            # Output logits for the next token prediction for all beams
            output = self.fc_out(output)
            output = output.permute(1, 0, 2).contiguous() # Shape: (beam_size, cur_len, vocab_size)

            # Extract scores for the current step (last token in the sequence)
            scores = F.log_softmax(output[:, -1:, :], dim=-1).squeeze(1)
            assert output[:, -1:, :].shape == (cfg_params.beam_size, 1, self.cfg.length_eq)

            vocab_size = scores.shape[-1]

            # Update scores and find next candidates
            # TODO: Combine the scores of the current step with the beam scores.
            # Each beam's prediction score is added to its cumulative score.
            _scores = ... # (beam_size, vocab_size)

            # TODO: Flatten the combined scores for easier top-k selection.
            # We flatten because we need to select top-k tokens across ALL beams, not just within each beam.
            _scores = ... # (beam_size * vocab_size)

            # TODO: Use `torch.topk` to select the top `2 * beam_size` scores.
            # Why 2 * beam_size? This ensures diversity and prevents early pruning. 
            # You will filter out redundant candidates in the next step.
            next_scores, next_words = ...

            next_sent_beam = []

            # Process each candidate for the next step
            for idx, value in zip(next_words, next_scores):
                beam_id = idx // vocab_size
                word_id = idx % vocab_size

                # Add completed hypotheses
                if word_id == cfg_params.word2id["F"] or cur_len + 1 == self.cfg.length_eq:
                    generated_hyps.add(
                        generated[beam_id, :cur_len].clone().cpu(),
                        value.item(),
                    )
                else:
                    next_sent_beam.append((value, word_id, beam_id))

                # Limit to beam size
                if len(next_sent_beam) == cfg_params.beam_size:
                    break

            # Update beam variables for the next step
            if len(next_sent_beam) == 0:
                next_sent_beam = [(0, self.trg_pad_idx, 0)] * cfg_params.beam_size

            assert len(next_sent_beam) == cfg_params.beam_size

            beam_scores = torch.tensor([x[0] for x in next_sent_beam])
            beam_words = torch.tensor([x[1] for x in next_sent_beam])
            beam_idx = torch.tensor([x[2] for x in next_sent_beam])
            generated = generated[beam_idx, :]
            generated[:, cur_len] = beam_words

            for k in cache.keys():
                if k != "slen":
                    cache[k] = (cache[k][0][beam_idx], cache[k][1][beam_idx])

            cur_len += 1

        return generated_hyps

    def fitfunc(self, X, y, cfg_params):
        """
        Perform symbolic regression using beam search and BFGS optimization.

        Args:
            X (numpy.ndarray): Input data of shape [Number_of_points, Number_of_features].
            y (numpy.ndarray): Target data of shape [Number_of_points].
            cfg_params: Configuration parameters for beam search and BFGS.

        Returns:
            dict: Results including predicted equations, losses, and constants.
        """
        # Prepare input tensors
        y = y[:, None]
        X = torch.tensor(X).unsqueeze(0)

        # Pad input to match required dimensions
        if X.shape[2] < self.cfg.dim_input - 1:
            pad = torch.zeros(1, X.shape[1], self.cfg.dim_input - X.shape[2] - 1)
            X = torch.cat((X, pad), dim=2)
        y = torch.tensor(y).unsqueeze(0)

        with torch.no_grad():
            # Combine inputs and targets for encoding
            encoder_input = torch.cat((X, y), dim=2)
            enc_src = self.enc(encoder_input)
            
            # Expand encoder outputs for beam search
            src_enc = enc_src
            shape_enc_src = (cfg_params.beam_size,) + src_enc.shape[1:]
            enc_src = src_enc.unsqueeze(1).expand((1, cfg_params.beam_size) + src_enc.shape[1:]).contiguous().view(shape_enc_src)

            # Beam search
            generated_hyps = self.beam_search(enc_src, cfg_params)

            # BFGS optimization for the best hypotheses
            best_preds_bfgs, best_L_bfgs, L_bfgs, P_bfgs = [], [], [], []
            cfg_params.id2word[3] = "constant"

            for __, ww in sorted(generated_hyps.hypotheses, key=lambda x: x[0], reverse=True):
                # import pdb
                # pdb.set_trace()
                pred_w_c, constants, loss_bfgs, exa = bfgs_fcn(ww, X, y, cfg_params)
                P_bfgs.append(str(pred_w_c))
                L_bfgs.append(loss_bfgs)

            if all(np.isnan(np.array(L_bfgs))):
                print("Warning: All losses are NaN.")
                L_bfgs = float("nan")
                best_L_bfgs = None
            else:
                best_preds_bfgs.append(P_bfgs[np.nanargmin(L_bfgs)])
                best_L_bfgs.append(np.nanmin(L_bfgs))

            # Return the output dictionary
            output = {
                "all_bfgs_preds": P_bfgs,
                "all_bfgs_loss": L_bfgs,
                "best_bfgs_preds": best_preds_bfgs,
                "best_bfgs_loss": best_L_bfgs,
            }
            self.eq = output["best_bfgs_preds"]
            return output

Next, we give you ready-to-go code for loading pre-trained models and test your code. The code sets up a pipeline for testing and comparing the performance of two models pre-trained on synthetic datasets generated from mathematical equations.

We offer two models, "10M" and "100M". Both are trained with the same parameter configuration in config/config.yaml (which contains details of how models are trained). "10M" model is trained with 10 million datasets and "100M" model is trained with 100 millions dataset.
* Link to 100M: [[`Link`](https://drive.google.com/file/d/1JfVBCkLc2iz9JZ72y2LI6y0_c01ShTRq/view?usp=sharing)]
* Link to 10M: [[`Link`](https://drive.google.com/file/d/1LsS08VqhgGaq8E_4VZJ7g_GJS5lEhy68/view?usp=sharing)]

In [None]:
def generate_test_data(eq, n_points, variable_range, eq_variables, total_variables):
    """
    Generate test data for the given equation.
    
    Args:
        eq (str): Target equation as a string (e.g., "x1*sin(x1) + 0.5").
        n_points (int): Number of test data points.
        variable_range (tuple): Min and max range for variables (e.g., (-10, 10)).
        eq_variables (list): List of variable names  for specific equation (e.g., ["x1", "x2"]).
        total_variables (list): List of variable names from config file.
    
    Returns:
        X (torch.Tensor): Input variables.
        y (np.array): Target outputs.
    """
    n_variables = len(eq_variables)
    X = np.random.uniform(variable_range[0], variable_range[1], size=(n_points, len(total_variables)))
    X[:,n_variables:] = 0
    X_dict = {var: X[:, idx] for idx, var in enumerate(total_variables)}
    y = lambdify(total_variables, eq)(**X_dict)
    return torch.tensor(X, dtype=torch.float32), np.array(y)

def evaluate_model(model, X, y, params_fit):
    """
    Evaluate the model's predictions and compute MSE.
    
    Args:
        model: Pre-trained model for evaluation.
        X (torch.Tensor): Input variables.
        y (np.array): Target outputs.
        params_fit: Configuration for the beam search.
    
    Returns:
        mse (float): Mean Squared Error of predictions.
    """
    # Run model to predict equation
    output = model.fitfunc(X, y, cfg_params=params_fit)
    predicted_eq = output['best_bfgs_preds'][0]
    print(f"Predicted equation: {predicted_eq}")

    # Generate predictions using the predicted equation
    y_pred = lambdify(params_fit.total_variables, predicted_eq)(**{var: X[:, idx].numpy() for idx, var in enumerate(params_fit.total_variables)})

    # Calculate Mean Squared Error
    mse = np.mean((y - y_pred) ** 2)
    print(f"Mean Squared Error: {mse}")
    return predicted_eq, mse

# Load config files, models, and their parameters 
with open('./config/eq_setting.json', 'r') as json_file:
    eq_setting = json.load(json_file)

cfg = omegaconf.OmegaConf.load("./config/config.yaml")

# Load the pre-trained models
model_10M = Model.load_from_checkpoint("./weights/10M.ckpt", cfg=cfg.architecture)
model_100M = Model.load_from_checkpoint("./weights/100M.ckpt", cfg=cfg.architecture)
model_10M.eval()
model_100M.eval()

if torch.cuda.is_available():
    model_10M.cuda()
    model_100M.cuda()

# Set up BFGS load rom the hydra config yaml
bfgs = BFGSParams(
        activated= cfg.inference.bfgs.activated,
        n_restarts=cfg.inference.bfgs.n_restarts,
        add_coefficients_if_not_existing=cfg.inference.bfgs.add_coefficients_if_not_existing,
        normalization_o=cfg.inference.bfgs.normalization_o,
        idx_remove=cfg.inference.bfgs.idx_remove,
        normalization_type=cfg.inference.bfgs.normalization_type,
        stop_time=cfg.inference.bfgs.stop_time,
    )

# Configuration for beam search
cfg_params = FitParams(
    word2id=eq_setting["word2id"],
    id2word={int(k): v for k, v in eq_setting["id2word"].items()},
    una_ops=eq_setting["una_ops"],
    bin_ops=eq_setting["bin_ops"],
    total_variables=list(eq_setting["total_variables"]),
    total_coefficients=list(eq_setting["total_coefficients"]),
    rewrite_functions=list(eq_setting["rewrite_functions"]),
    bfgs=bfgs,
    beam_size=cfg.inference.beam_size
)

Use the functions to generate the test data and predict a equation.

In [None]:
X, y = generate_test_data("x_1*sin(x_1) + 1/20 * cos(x_1)", 500, (-5, 5), ["x_1"], ["x_1", "x_2", "x_3"])
predicted_eq, mse_10M = evaluate_model(model_10M, X, y, cfg_params)
print("\nPredicted equation", predicted_eq)
print("MSE", mse_10M)

## Part 2: Systematic evaluation of pre-trained SR models

Next, we will define several test cases and evaluate the two models. 

In [None]:
# Define test cases
test_cases = [
    {"eq": "x_1 * sin(x_1)", "n_points": 500, "variable_range": (-10, 10), "variables": ["x_1"]},
    {"eq": "x_1 * exp(x_2) + 0.1", "n_points": 500, "variable_range": (-3, 3), "variables": ["x_1", "x_2"]},
    {"eq": "x_1**3 - x_2**2 + 0.1 * x_3**2", "n_points": 500, "variable_range": (-4, 4), "variables": ["x_1", "x_2", "x_3"]},
    {"eq": "exp(-x_1) * sin(x_2) + log(abs(x_3) + 1)", "n_points": 500, "variable_range": (-2, 2), "variables": ["x_1", "x_2", "x_3"]},
    {"eq": "tan(x_1) / (1 + x_2**2) + x_3**2", "n_points": 500, "variable_range": (-1, 1), "variables": ["x_1", "x_2", "x_3"]},
]

# Evaluate models
results = {"10M": [], "100M": []}

for case in test_cases:
    print(f"\nEvaluating for equation: {case['eq']}")

    # Generate test data
    X, y = generate_test_data(case["eq"], case["n_points"], case["variable_range"], case["variables"], eq_setting["total_variables"])

    # Evaluate 10M model
    print("\n10M Model Results:")
    predicted_eq, mse_10M = evaluate_model(model_10M, X, y, cfg_params)
    results["10M"].append({"eq": case["eq"], 'pred_eq': predicted_eq, "mse": mse_10M})

    # Evaluate 100M model
    print("\n100M Model Results:")
    predicted_eq, mse_100M = evaluate_model(model_100M, X, y, cfg_params)
    results["100M"].append({"eq": case["eq"], 'pred_eq': predicted_eq, "mse": mse_100M})

# Print results summary
print("\n--- Results Summary ---")
for model_name, model_results in results.items():
    print(f"\nModel: {model_name}")
    for result in model_results:
        print(f"Equation: {result['eq']} | Pred Eq.: {result['pred_eq']} | MSE: {result['mse']:.6f}")

**Task 4: Comment on the last results**
* Compare methods quantitatively and qualitatively.

## Part 3: Exploring the limitations of NeSymReS

**Task 5: Break the code**
* Your goal is to design equations that cause the model to fail in some way (e.g., high errors, runtime issues, or incorrect predictions). You are encouraged to vary the complexity of equations, variable range, and number of variables. 
* Analyze why the failure occurs and suggest potential improvements. 

In [None]:
# TODO: Break the code!