# Aicua_SR - Symbolic Regression Training on GPU

Train PySR models on Kaggle GPU for faster formula discovery.

## Setup
1. Enable GPU: Settings → Accelerator → GPU P100/T4
2. Clone Aicua_SR repo from GitHub
3. Run training cells

In [None]:
# Clone Aicua_SR repository
!git clone https://github.com/Aicua/Aicua_SR.git
%cd Aicua_SR

In [None]:
# Install dependencies
!pip install pysr pandas numpy sympy scikit-learn

In [None]:
# Check GPU availability
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Generate datasets (v2/v4 - latest versions)
!python scripts/generate_petal_spline_v2.py
!python scripts/generate_bone_rigging_v4.py
!python scripts/generate_cot_dataset.py

In [None]:
# Load petal spline v2 dataset (middle-wide shape, ultra-thin)
import pandas as pd
import numpy as np

df = pd.read_csv('data/processed/petal_spline_v2.csv')
print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"\nDataset statistics:")
print(f"  base_size range: {df['base_size'].min():.2f} - {df['base_size'].max():.2f}")
print(f"  petal_height (cp3_y) range: {df['cp3_y'].min():.4f} - {df['cp3_y'].max():.4f}")
mid_width = df['cp4_x'] - df['cp2_x']
base_width = df['cp5_x'] - df['cp1_x']
print(f"  Mid/Base ratio: {(mid_width / base_width).mean():.2f} (should be ~2.0)")
print(f"  Thickness/Height ratio: {(df['extrude_depth'] / df['cp3_y']).mean():.4f} (ultra-thin)")
df.head()

In [None]:
# Real PySR Training - Petal Spline Control Points (V2)
from pysr import PySRRegressor

# Features (v2 naming)
features = ['base_size', 'layer_index', 'petal_index', 'opening_degree']
X = df[features].values

# Train model for cp3_y (petal height) - most important parameter
y = df['cp3_y'].values

model = PySRRegressor(
    niterations=100,  # Increase for better results (GPU handles this fast)
    binary_operators=["+", "-", "*", "/"],
    unary_operators=["sqrt", "square", "sin", "cos"],
    populations=20,
    population_size=50,
    maxsize=25,
    ncyclesperiteration=200,
    parsimony=0.001,  # Prefer simpler formulas
    model_selection="best",
    verbosity=1,
)

print("Training SR model for cp3_y (petal height)...")
model.fit(X, y, variable_names=features)

print(f"\nBest equation for cp3_y:")
print(model.sympy())

In [None]:
# View all discovered equations
print("All equations (sorted by complexity):")
print(model)

# Latex format
print(f"\nLatex: {model.latex()}")

In [None]:
# Train models for all control points (V2)
targets = ['cp1_x', 'cp1_y', 'cp2_x', 'cp2_y', 'cp3_x', 'cp3_y', 
           'cp4_x', 'cp4_y', 'cp5_x', 'cp5_y', 'extrude_depth']

discovered_formulas = {}

for target in targets:
    print(f"\n{'='*60}")
    print(f"Training model for: {target}")
    print(f"{'='*60}")
    
    y = df[target].values
    
    model = PySRRegressor(
        niterations=50,  # Faster for demo, increase for production
        binary_operators=["+", "-", "*", "/"],
        unary_operators=["sqrt", "square"],
        populations=10,
        population_size=30,
        maxsize=20,
        parsimony=0.002,
        model_selection="best",
        verbosity=0,
    )
    
    model.fit(X, y, variable_names=features)
    
    formula = str(model.sympy())
    discovered_formulas[target] = formula
    
    print(f"✓ {target} = {formula}")

print(f"\n{'='*60}")
print("All discovered formulas for petal_spline_v2:")
print(f"{'='*60}")
for target, formula in discovered_formulas.items():
    print(f"{target} = {formula}")

In [None]:
# Save discovered formulas to JSON
import json

with open('data/generated/sr_discovered_formulas.json', 'w') as f:
    json.dump(discovered_formulas, f, indent=2)

print("✓ Saved formulas to data/generated/sr_discovered_formulas.json")

In [None]:
# Generate Python code from discovered formulas (V2)
def formula_to_python(formula_str, target_name):
    """Convert SymPy formula to Python function."""
    # Replace variable names (v2 naming)
    code = formula_str
    code = code.replace('base_size', 'x0')
    code = code.replace('layer_index', 'x1')
    code = code.replace('petal_index', 'x2')
    code = code.replace('opening_degree', 'x3')
    
    # Replace math functions
    code = code.replace('sqrt', 'math.sqrt')
    code = code.replace('sin', 'math.sin')
    code = code.replace('cos', 'math.cos')
    code = code.replace('square', 'lambda x: x**2')
    
    func_code = f"""
def compute_{target_name}(base_size, layer_index, petal_index, opening_degree):
    \"\"\"SR-discovered formula for {target_name} (V2 middle-wide shape).\"\"\" 
    x0, x1, x2, x3 = base_size, layer_index, petal_index, opening_degree
    return {code}
"""
    return func_code

# Generate Python module
module_code = '''#!/usr/bin/env python3
"""Auto-generated SR formulas for petal spline V2 control points.

Generated by Kaggle GPU training.
Features: middle-wide shape, ultra-thin thickness
"""
import math

'''

for target, formula in discovered_formulas.items():
    module_code += formula_to_python(formula, target)
    module_code += "\n"

# Save to file
with open('data/generated/petal_spline_v2_formulas.py', 'w') as f:
    f.write(module_code)

print("✓ Generated Python module: data/generated/petal_spline_v2_formulas.py")
print("\nGenerated code preview:")
print(module_code[:1500])

In [None]:
# Test generated formulas (V2)
import sys
sys.path.insert(0, 'data/generated')
import petal_spline_v2_formulas as psf

# Test with sample input (larger base_size for V2)
base_size = 5.0
layer_index = 1  # 0-based
petal_index = 0
opening_degree = 0.8

print(f"Test input: base_size={base_size}, layer_index={layer_index}, petal_index={petal_index}, opening_degree={opening_degree}")
print()

for target in targets:
    func = getattr(psf, f'compute_{target}')
    value = func(base_size, layer_index, petal_index, opening_degree)
    print(f"{target} = {value:.6f}")

# Verify middle-wide shape
cp2_x = psf.compute_cp2_x(base_size, layer_index, petal_index, opening_degree)
cp4_x = psf.compute_cp4_x(base_size, layer_index, petal_index, opening_degree)
cp1_x = psf.compute_cp1_x(base_size, layer_index, petal_index, opening_degree)
cp5_x = psf.compute_cp5_x(base_size, layer_index, petal_index, opening_degree)

mid_width = cp4_x - cp2_x
base_width = cp5_x - cp1_x
print(f"\nMiddle-wide verification:")
print(f"  Mid width: {mid_width:.4f}")
print(f"  Base width: {base_width:.4f}")
print(f"  Ratio: {mid_width/base_width:.2f} (should be ~2.0)")

In [None]:
# Train bone_rigging_v4 (4 bones branching structure)
print("="*60)
print("BONE RIGGING V4 - 4 Bones Branching Structure")
print("="*60)

bone_df = pd.read_csv('data/processed/bone_rigging_v4.csv')
print(f"Dataset shape: {bone_df.shape}")
print(f"Features: {list(bone_df.columns[:5])}")
print(f"Targets: {list(bone_df.columns[5:])}")

# Features for bone rigging
bone_features = ['petal_height', 'petal_width', 'opening_degree', 'layer_index', 'curvature_intensity']
bone_X = bone_df[bone_features].values

# Train key bone parameters (root_end_y, middle_end_y, left_end_x, right_end_x)
key_bone_targets = ['bone_root_end_y', 'bone_middle_end_y', 'bone_left_end_x', 'bone_left_end_y', 'bone_right_end_x']

bone_formulas = {}

for target in key_bone_targets:
    print(f"\nTraining: {target}")
    y = bone_df[target].values
    
    model = PySRRegressor(
        niterations=30,
        binary_operators=["+", "-", "*", "/"],
        unary_operators=["sqrt"],
        populations=10,
        population_size=30,
        maxsize=15,
        parsimony=0.003,
        model_selection="best",
        verbosity=0,
    )
    
    model.fit(bone_X, y, variable_names=bone_features)
    formula = str(model.sympy())
    bone_formulas[target] = formula
    print(f"✓ {target} = {formula}")

print(f"\n{'='*60}")
print("Bone Rigging V4 Formulas (4 bones branching):")
print(f"{'='*60}")
for target, formula in bone_formulas.items():
    print(f"{target} = {formula}")

In [None]:
# Commit and push back to GitHub (optional)
# Requires setting up Git credentials

# !git config user.name "Kaggle Training"
# !git config user.email "training@kaggle.com"
# !git add data/generated/
# !git commit -m "feat: Add SR-discovered formulas from Kaggle GPU training"
# !git push

## Next Steps

1. **Increase iterations**: `niterations=200-500` for better formulas
2. **Add more operators**: `"exp", "log", "pow"` for complex patterns
3. **Train CoT models**: Predict optimal CP count from shape features
4. **Export to Aicua_SR**: Download formulas and commit to GitHub