# Aicua_SR - Symbolic Regression Training on GPU

Train PySR models on Kaggle GPU for faster formula discovery.

## Setup
1. Enable GPU: Settings → Accelerator → GPU P100/T4
2. Clone Aicua_SR repo from GitHub
3. Run training cells

In [None]:
# Clone Aicua_SR repository
!git clone https://github.com/Aicua/Aicua_SR.git
%cd Aicua_SR

In [None]:
# Install dependencies
!pip install pysr pandas numpy sympy scikit-learn

In [None]:
# Check GPU availability
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Generate datasets (if not exists)
!python scripts/generate_spline_dataset.py
!python scripts/generate_cot_dataset.py

In [None]:
# Load petal spline dataset
import pandas as pd
import numpy as np

df = pd.read_csv('data/generated/petal_spline_dataset.csv')
print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
df.head()

In [None]:
# Real PySR Training - Petal Spline Control Points
from pysr import PySRRegressor

# Features and targets
features = ['base_size', 'layer_idx', 'petal_idx', 'opening_degree']
X = df[features].values

# Train model for cp3_y (petal height) - most important parameter
y = df['cp3_y'].values

model = PySRRegressor(
    niterations=100,  # Increase for better results (GPU handles this fast)
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sqrt", "square", "sin", "cos"],
    populations=20,
    population_size=50,
    maxsize=25,
    ncyclesperiteration=200,
    parsimony=0.001,  # Prefer simpler formulas
    model_selection="best",
    verbosity=1,
)

print("Training SR model for cp3_y (petal height)...")
model.fit(X, y, variable_names=features)

print(f"\nBest equation for cp3_y:")
print(model.sympy())

In [None]:
# View all discovered equations
print("All equations (sorted by complexity):")
print(model)

# Latex format
print(f"\nLatex: {model.latex()}")

In [None]:
# Train models for all control points
targets = ['cp1_x', 'cp1_y', 'cp2_x', 'cp2_y', 'cp3_x', 'cp3_y', 
           'cp4_x', 'cp4_y', 'cp5_x', 'cp5_y', 'extrude_depth']

discovered_formulas = {}

for target in targets:
    print(f"\n{'='*60}")
    print(f"Training model for: {target}")
    print(f"{'='*60}")
    
    y = df[target].values
    
    model = PySRRegressor(
        niterations=50,  # Faster for demo, increase for production
        binary_operators=["+", "-", "*", "/"],
        unary_operators=["sqrt", "square"],
        populations=10,
        population_size=30,
        maxsize=20,
        parsimony=0.002,
        model_selection="best",
        verbosity=0,
    )
    
    model.fit(X, y, variable_names=features)
    
    formula = str(model.sympy())
    discovered_formulas[target] = formula
    
    print(f"✓ {target} = {formula}")

print(f"\n{'='*60}")
print("All discovered formulas:")
print(f"{'='*60}")
for target, formula in discovered_formulas.items():
    print(f"{target} = {formula}")

In [None]:
# Save discovered formulas to JSON
import json

with open('data/generated/sr_discovered_formulas.json', 'w') as f:
    json.dump(discovered_formulas, f, indent=2)

print("✓ Saved formulas to data/generated/sr_discovered_formulas.json")

In [None]:
# Generate Python code from discovered formulas
def formula_to_python(formula_str, target_name):
    """Convert SymPy formula to Python function."""
    # Replace variable names
    code = formula_str
    code = code.replace('base_size', 'x0')
    code = code.replace('layer_idx', 'x1')
    code = code.replace('petal_idx', 'x2')
    code = code.replace('opening_degree', 'x3')
    
    # Replace math functions
    code = code.replace('sqrt', 'math.sqrt')
    code = code.replace('sin', 'math.sin')
    code = code.replace('cos', 'math.cos')
    
    func_code = f"""
def compute_{target_name}(base_size, layer_idx, petal_idx, opening_degree):
    \"\"\"SR-discovered formula for {target_name}.\"\"\" 
    x0, x1, x2, x3 = base_size, layer_idx, petal_idx, opening_degree
    return {code}
"""
    return func_code

# Generate Python module
module_code = '''#!/usr/bin/env python3
"""Auto-generated SR formulas for petal spline control points.

Generated by Kaggle GPU training.
"""
import math

'''

for target, formula in discovered_formulas.items():
    module_code += formula_to_python(formula, target)
    module_code += "\n"

# Save to file
with open('data/generated/petal_spline_formulas.py', 'w') as f:
    f.write(module_code)

print("✓ Generated Python module: data/generated/petal_spline_formulas.py")
print("\nGenerated code preview:")
print(module_code[:1000])

In [None]:
# Test generated formulas
import sys
sys.path.insert(0, 'data/generated')
import petal_spline_formulas as psf

# Test with sample input
base_size = 2.0
layer_idx = 1
petal_idx = 0
opening_degree = 0.8

print(f"Test input: base_size={base_size}, layer_idx={layer_idx}, petal_idx={petal_idx}, opening_degree={opening_degree}")
print()

for target in targets:
    func = getattr(psf, f'compute_{target}')
    value = func(base_size, layer_idx, petal_idx, opening_degree)
    print(f"{target} = {value:.4f}")

In [None]:
# Commit and push back to GitHub (optional)
# Requires setting up Git credentials

# !git config user.name "Kaggle Training"
# !git config user.email "training@kaggle.com"
# !git add data/generated/
# !git commit -m "feat: Add SR-discovered formulas from Kaggle GPU training"
# !git push

## Next Steps

1. **Increase iterations**: `niterations=200-500` for better formulas
2. **Add more operators**: `"exp", "log", "pow"` for complex patterns
3. **Train CoT models**: Predict optimal CP count from shape features
4. **Export to Aicua_SR**: Download formulas and commit to GitHub