# **Generating Mathematical Expressions using Sympy:**
# **1. Introduction to Python Libraries for Symbolic Mathematics:**


SymPy is a Python library for symbolic mathematics. It aims to become a full-featured computer algebra system while keeping the code as simple as possible to understand. SymPy is written entirely in Python.



Features of SymPy:

Symbolic computation

Algebra

Calculus

Solving equations

Discrete mathematics



Installation:

To install SymPy, use the following command:

pip install sympy

In [1]:
import sympy as sp
import random
import pandas as pd
import re
import json
import uuid
from sympy import symbols
import numpy as np
import inspect


# Logarithmic Expression Generation

In [2]:
def generate_expression_logrithmic(order):
    x = symbols('x')
    a = random.randint(1, 10)
    b = random.randint(1, 10)
    c = random.randint(1, 10)
    expr = a * sp.log(b * x) + c
    return expr


In [7]:
# Everytime it gives a random expression
order=random.randint(1,6)
log_expr=generate_expression_logrithmic(order)
latex_log=sp.latex(log_expr)
print(log_expr)
print(latex_log)


2*log(x) + 9
2 \log{\left(x \right)} + 9


In [5]:
data1=[]
for _ in range(10):
    order=random.randint(1,6)
    log_expr=generate_expression_logrithmic(order)
    latex_log=sp.latex(log_expr)
    data1.append({
        "sympy_exp": log_expr,
        "latex_expression": latex_log})
print(data1)


[{'sympy_exp': 7*log(7*x) + 2, 'latex_expression': '7 \\log{\\left(7 x \\right)} + 2'}, {'sympy_exp': 8*log(7*x) + 3, 'latex_expression': '8 \\log{\\left(7 x \\right)} + 3'}, {'sympy_exp': 4*log(3*x) + 4, 'latex_expression': '4 \\log{\\left(3 x \\right)} + 4'}, {'sympy_exp': 8*log(x) + 10, 'latex_expression': '8 \\log{\\left(x \\right)} + 10'}, {'sympy_exp': 9*log(8*x) + 6, 'latex_expression': '9 \\log{\\left(8 x \\right)} + 6'}, {'sympy_exp': 10*log(3*x) + 6, 'latex_expression': '10 \\log{\\left(3 x \\right)} + 6'}, {'sympy_exp': 10*log(9*x) + 7, 'latex_expression': '10 \\log{\\left(9 x \\right)} + 7'}, {'sympy_exp': 4*log(4*x) + 6, 'latex_expression': '4 \\log{\\left(4 x \\right)} + 6'}, {'sympy_exp': 9*log(9*x) + 9, 'latex_expression': '9 \\log{\\left(9 x \\right)} + 9'}, {'sympy_exp': 5*log(5*x) + 8, 'latex_expression': '5 \\log{\\left(5 x \\right)} + 8'}]


# **Converting Mathematical Expressions to Code and Creating Advanced Equations**

1. Introduction to Converting Mathematical Expressions to Python Code
SymPy allows users to define and manipulate mathematical expressions symbolically and convert them into executable Python code.

# Generate 100 Multivariable Expressions
*   Use the python code to generate 100 random multivariable expressions.
*   Print each generated expression.
*   Convert each generated expression into executable Python code.


*   Save all generated expressions and corresponding codes into a .json file.

*   Ensure the file `multivariable.json` is correctly saved.

*   Submit the file for evaluation.

In [8]:
symbols = sp.symbols('x y z a b c')

# Detect functions to add specific imports
def detect_sympy_functions(expr):
    sympy_functions = set()
    for sub_expr in expr.find(sp.Function):
        sympy_functions.add(type(sub_expr))
    return sympy_functions

def expr_to_code(expr, variables):
    variables = tuple(variables)
    sympy_functions = detect_sympy_functions(expr)
    imports = "import numpy as np\n"

    if sympy_functions:
        imports += "from sympy import " + ", ".join([func.__name__ for func in sympy_functions]) + "\n"

    code = sp.lambdify(variables, expr, "numpy")
    source_code = inspect.getsource(code)
    return imports + source_code

In [9]:
#############Generating Multivariable Expressions#############

# Define possible symbols
def generate_multivariable_expression():
    num_symbols = random.randint(2, len(symbols))  # Randomly choose how many symbols to use in the expression
    chosen_symbols = random.sample(symbols, num_symbols)
    order = random.randint(1, 6)  # Random order of the polynomial

    coefficients = [random.randint(0, 5) for _ in range(order)]
    while all(coeff == 0 for coeff in coefficients):  # Ensure not all coefficients are zero
        coefficients = [random.randint(0, 5) for _ in range(order)]

    constant = random.randint(0, 5)
    expr = sum(coeff * sp.Mul(*[s**random.randint(0, 3) for s in chosen_symbols]) for coeff in coefficients) + constant
    return expr, chosen_symbols


In [10]:
# Generate 100 expressions
multi_exprs = []
for _ in range(100):
    expr, variables = generate_multivariable_expression()
    code_source = expr_to_code(expr, variables)
    multi_exprs.append({
        "expression": str(expr),
        "code": code_source
    })

# Save to a .json file
filename = "multivariable.json"
with open(filename, 'w') as file:
    json.dump(multi_exprs, file, indent=4)

In [11]:
# Test sample generated code
def _lambdifygenerated(c, x, y):
      return c**3*x*y**3 + 3
_lambdifygenerated(1,7,2)

59

# Generate 100 Trigonometric Expressions
*   Use the python code to generate 100 random Trigonometric expressions.

*   Print each generated expression.
*   Convert each generated expression into executable Python code.


*   Save all generated expressions and corresponding codes into a .json file.

*   Ensure the file `trigonometric.json` is correctly saved.

*   Submit the file for evaluation.

In [12]:
# Define possible trigonometric functions
trig_functions = [sp.sin, sp.cos, sp.tan, sp.cot, sp.sec, sp.csc]
def generate_trig_expression():
    num_symbols = random.randint(2, len(symbols))  # Randomly choose how many symbols to use in the expression
    chosen_symbols = random.sample(symbols, num_symbols)
    order = random.randint(1, 6)  # Random order of the polynomial

    coefficients = [random.randint(0, 5) for _ in range(order)]
    while all(coeff == 0 for coeff in coefficients):  # Ensure not all coefficients are zero
        coefficients = [random.randint(0, 5) for _ in range(order)]

    constant = random.randint(0, 5)
    expr = sum(coeff * random.choice(trig_functions)(sp.Mul(*[s**random.randint(1, 3) for s in chosen_symbols])) for coeff in coefficients) + constant
    return expr, chosen_symbols


In [13]:
# Generate 100 expressions
trig_exprs = []
for _ in range(100):
    expr, variables = generate_trig_expression()
    code_source = expr_to_code(expr, variables)
    # test_cases = generate_test_cases(expr, variables)
    trig_exprs.append({
        "expression": str(expr),
        "code": code_source,
        # "test_cases": test_cases
    })

# Save to a .json file
filename = "trigonometric.json"
with open(filename, 'w') as file:
    json.dump(trig_exprs, file, indent=4)

print(f"Expressions saved to {filename}")

Expressions saved to trigonometric.json


# Generate 100 Geometric Expressions
*   Use the python code to generate 100 random Trigonometric expressions.

*   Print each generated expression.
*   Convert each generated expression into executable Python code.


*   Save all generated expressions and corresponding codes into a .json file.

*   Ensure the file `geometric.json` is correctly saved.

*   Submit the file for evaluation.

In [14]:
geo_functions = [sp.sqrt, sp.Abs]

def generate_geometric_expression():
    num_symbols = random.randint(2, len(symbols))  # Randomly choose how many symbols to use in the expression
    chosen_symbols = random.sample(symbols, num_symbols)
    order = random.randint(1, 3)  # Random order of the polynomial for geometric expressions

    coefficients = [random.randint(1, 5) for _ in range(order)]
    while all(coeff == 0 for coeff in coefficients):  # Ensure not all coefficients are zero
        coefficients = [random.randint(1, 5) for _ in range(order)]

    constant = random.randint(1, 5)
    expr = sum(coeff * random.choice(geo_functions)(sp.Mul(*[s**random.randint(1, 3) for s in chosen_symbols])) for coeff in coefficients) + constant
    return expr, chosen_symbols

In [15]:
# Generate 100 expressions
geo_exprs = []
for _ in range(100):
    expr, variables = generate_geometric_expression()
    code_source = expr_to_code(expr, variables)
    geo_exprs.append({
        "expression": str(expr),
        "code": code_source
    })

# Save to a .json file
filename = "geometric.json"
with open(filename, 'w') as file:
    json.dump(geo_exprs, file, indent=4)

print(f"Expressions saved to {filename}")

Expressions saved to geometric.json


# Generate 100 Linear Diophantine Expressions
*   Use the python code to generate 100 random Trigonometric expressions.

*   Print each generated expression.
*   Convert each generated expression into executable Python code.


*   Save all generated expressions and corresponding codes into a .json file.

*   Ensure the file `linear_diophantine.json` is correctly saved.

*   Submit the file for evaluation.

In [16]:
def generate_linear_diophantine_expression():
    num_symbols = random.randint(2, len(symbols))  # Randomly choose how many symbols to use in the expression
    chosen_symbols = random.sample(symbols, num_symbols)

    coefficients = [random.randint(1, 10) for _ in range(num_symbols)]
    constant = random.randint(1, 10)
    expr = sum(coeff * s for coeff, s in zip(coefficients, chosen_symbols)) + constant
    return expr, chosen_symbols

In [17]:
# Generate 100 expressions
dioph_exprs = []
for _ in range(100):
    expr, variables = generate_linear_diophantine_expression()
    code_source = expr_to_code(expr, variables)
    dioph_exprs.append({
        "expression": str(expr),
        "code": code_source,

    })

# Save to a .json file
filename = "linear_diophantine.json"
with open(filename, 'w') as file:
    json.dump(dioph_exprs, file, indent=4)

print(f"Expressions saved to {filename}")

Expressions saved to linear_diophantine.json


# Generate 100 Summation Expressions
*   Use the python code to generate 100 random Trigonometric expressions.

*   Print each generated expression.
*   Convert each generated expression into executable Python code.


*   Save all generated expressions and corresponding codes into a .json file.

*   Ensure the file `summation.json` is correctly saved.

*   Submit the file for evaluation.

In [18]:
def generate_summation_expression():
    num_symbols = random.randint(2, len(symbols))  # Randomly choose how many symbols to use in the expression
    chosen_symbols = random.sample(symbols, num_symbols)
    order = random.randint(1, 3)  # Random order of the polynomial for summation expressions

    coefficients = [random.randint(1, 5) for _ in range(order)]
    while all(coeff == 0 for coeff in coefficients):  # Ensure not all coefficients are zero
        coefficients = [random.randint(1, 5) for _ in range(order)]

    constant = random.randint(1, 5)
    expr = sum(coeff * random.choice(trig_functions)(sp.Mul(*[s**random.randint(1, 3) for s in chosen_symbols])) for coeff in coefficients) + constant
    return expr, chosen_symbols

In [19]:
# Generate 100 expressions
sum_exprs = []
for _ in range(100):
    expr, variables = generate_summation_expression()
    code_source = expr_to_code(expr, variables)
    sum_exprs.append({
        "expression": str(expr),
        "code": code_source
    })

# Save to a .json file
filename = "summation.json"
with open(filename, 'w') as file:
    json.dump(sum_exprs, file, indent=4)

print(f"Expressions saved to {filename}")

Expressions saved to summation.json


#**Add Test Cases for all the above Created Examples**

Create Test cases for above created expressions and respective codes(Multivariable, Trigonometric, Geometric, Diophantine, Summation).




In [20]:
def generate_test_cases(expr, variables, num_cases=5):
    test_cases = []
    for _ in range(num_cases):
        inputs = {str(var): np.random.rand() * 10 for var in variables}
        sympy_result = float(expr.subs(inputs))
        test_case = {
            "input": inputs,
            "output": sympy_result
        }
        test_cases.append(test_case)
    return test_cases

##**Required Dataset format**

In [None]:
#Data Schema
[
    {
        "task_id": "",
        "sympy_exp": "",
        "latex_exp": "",
        "solution": "",
        "simplified_solution": "",
        "synthetic": true,
        "domain": "",
        "test_cases": "",
        "complexity": "",
        "equation_type": "",
        "output_type": "real | fun | complex| inequality"
    }
]

In [21]:
def determine_complexity(expr):
    # Initialize complexity score
    complexity = 0

    # Number of variables
    num_vars = len(expr.free_symbols)
    complexity += num_vars

    # Degree of the expression
    degree = sp.Poly(expr).total_degree()
    complexity += degree

    # Number of operations
    num_operations = len([arg for arg in sp.preorder_traversal(expr) if isinstance(arg, sp.Basic) and not isinstance(arg, sp.Symbol)])
    complexity += num_operations

    # Types of functions used
    sympy_functions = detect_sympy_functions(expr)
    complexity += len(sympy_functions) * 2  # Assign higher weight to functions

    # Nested functions
    nested_depth = max([arg.count_ops() for arg in expr.args], default=0)
    complexity += nested_depth

    # Normalize complexity to be between 1 and 5
    max_possible_complexity = 100  # Arbitrary large value to cover most cases, adjust as necessary
    normalized_complexity = min(5, max(1, int(5 * (complexity / max_possible_complexity))))

    # Return the final complexity score
    return normalized_complexity


In [23]:
def create_expression_entry(expr, variables, domain, equation_type, output_type):
    task_id = str(uuid.uuid4())[:8]  # Shorten the UUID to 8 characters
    sympy_exp = str(expr)
    latex_exp = sp.latex(expr)
    simplified_solution = sp.simplify(expr)
    solution = expr_to_code(expr, variables)
    # complexity = determine_complexity(expr)
    test_cases = generate_test_cases(expr, variables)
    return {
        "task_id": task_id,
        "sympy_exp": sympy_exp,
        "latex_exp": latex_exp,
        "solution": solution,
        "simplified_solution": str(simplified_solution),
        "synthetic": True,
        "domain": domain,
        "test_cases": test_cases,
        "complexity": 2,
        "equation_type": equation_type,
        "output_type": output_type
    }

# Generate and save expressions
expressions = []

for _ in range(100):
    expr, variables = generate_multivariable_expression()
    expressions.append(create_expression_entry(expr, variables, "Mathematics_Algebra", "algebraic", "real"))

for _ in range(100):
    expr, variables = generate_trig_expression()
    expressions.append(create_expression_entry(expr, variables, "Mathematics_Trigonometry", "trigonometric", "real"))

for _ in range(100):
    expr, variables = generate_geometric_expression()
    expressions.append(create_expression_entry(expr, variables, "Mathematics_Geometry", "geometric", "real"))

for _ in range(100):
    expr, variables = generate_linear_diophantine_expression()
    expressions.append(create_expression_entry(expr, variables, "Mathematics_Algebra", "diophantine", "real"))

for _ in range(100):
    expr, variables = generate_summation_expression()
    expressions.append(create_expression_entry(expr, variables, "Mathematics_Algebra", "summation", "real"))

# Save to a .json file
filename = "expressions_with_tests.json"
with open(filename, 'w') as file:
    json.dump(expressions, file, indent=4)

print(f"Expressions saved to {filename}")

Expressions saved to expressions_with_tests.json
