# Edge Cases and Special Scenarios

This notebook demonstrates how Folio handles edge cases:

- Categorical inputs (recording observations)
- Boundary values (inputs at bounds)
- Empty projects (no observations)
- Minimal data (few observations)
- Deleting observations
- Input validation errors
- Narrow bounds
- High-dimensional inputs
- Minimization objectives
- Random recommender

In [None]:
import tempfile
import numpy as np

from folio.api import Folio
from folio.core.config import RecommenderConfig, TargetConfig
from folio.core.schema import InputSpec, OutputSpec
from folio.exceptions import InvalidInputError, InvalidOutputError, ProjectNotFoundError

In [None]:
db_path = tempfile.mktemp(suffix=".db")
folio = Folio(db_path=db_path)

## 1. Categorical Inputs

Folio supports categorical variables for recording observations (e.g., solvent choice, catalyst type).

**Note**: BO suggestions currently work with continuous inputs only. For mixed spaces,
record observations with categorical inputs and get suggestions for the continuous dimensions.

In [None]:
folio.create_project(
    name="solvent_screening",
    inputs=[
        InputSpec("solvent", "categorical", levels=["water", "ethanol", "dmso", "thf"]),
        InputSpec("concentration", "continuous", bounds=(0.1, 1.0), units="M"),
    ],
    outputs=[OutputSpec("yield", units="%")],
    target_configs=[TargetConfig(objective="yield", objective_mode="maximize")],
)
print("Created project with categorical input")

In [None]:
# Add observations with different solvents
folio.add_observation(
    project_name="solvent_screening",
    inputs={"solvent": "water", "concentration": 0.5},
    outputs={"yield": 45.0},
)
folio.add_observation(
    project_name="solvent_screening",
    inputs={"solvent": "ethanol", "concentration": 0.5},
    outputs={"yield": 72.0},
)
folio.add_observation(
    project_name="solvent_screening",
    inputs={"solvent": "dmso", "concentration": 0.5},
    outputs={"yield": 68.0},
)
print("Added observations with categorical solvent values")

In [None]:
# Get suggestion - only continuous inputs (concentration) are suggested
suggestion = folio.suggest("solvent_screening")[0]
print(f"Suggested concentration: {suggestion['concentration']:.2f} M")
print("(Categorical 'solvent' must be chosen by the experimenter)")

## 2. Boundary Values

Values exactly at the bounds are valid.

In [None]:
folio.create_project(
    name="boundary_test",
    inputs=[InputSpec("x", "continuous", bounds=(0.0, 100.0))],
    outputs=[OutputSpec("y")],
    target_configs=[TargetConfig(objective="y", objective_mode="maximize")],
)

# Values at exact bounds are valid
folio.add_observation("boundary_test", inputs={"x": 0.0}, outputs={"y": 10.0})
folio.add_observation("boundary_test", inputs={"x": 100.0}, outputs={"y": 50.0})
folio.add_observation("boundary_test", inputs={"x": 50.0}, outputs={"y": 80.0})
print("Added observations at boundary values (0.0 and 100.0)")

obs = folio.get_observations("boundary_test")
for o in obs:
    print(f"  x={o.inputs['x']}, y={o.outputs['y']}")

## 3. Empty Project (No Observations)

Folio returns random suggestions when no data exists.

In [None]:
folio.create_project(
    name="empty_project",
    inputs=[
        InputSpec("x", "continuous", bounds=(0.0, 10.0)),
        InputSpec("y", "continuous", bounds=(0.0, 10.0)),
    ],
    outputs=[OutputSpec("result")],
    target_configs=[TargetConfig(objective="result", objective_mode="maximize")],
)

# Get suggestion with no prior data
suggestion = folio.suggest("empty_project")[0]
print(f"Suggestion for empty project: x={suggestion['x']:.2f}, y={suggestion['y']:.2f}")
print("(Random sample within bounds)")

## 4. Minimal Data (Below n_initial)

With fewer observations than `n_initial` (default 5), Folio uses random sampling
to explore before fitting the GP model.

In [None]:
folio.create_project(
    name="minimal_data",
    inputs=[InputSpec("x", "continuous", bounds=(0.0, 10.0))],
    outputs=[OutputSpec("y")],
    target_configs=[TargetConfig(objective="y", objective_mode="maximize")],
    recommender_config=RecommenderConfig(n_initial=5),
)

# Add only 2 observations (below n_initial=5)
folio.add_observation("minimal_data", inputs={"x": 2.0}, outputs={"y": 30.0})
folio.add_observation("minimal_data", inputs={"x": 8.0}, outputs={"y": 70.0})

print("Only 2 observations (n_initial=5), so suggestions are random:")
for i in range(3):
    s = folio.suggest("minimal_data")[0]
    print(f"  Suggestion {i+1}: x={s['x']:.2f}")

## 5. Deleting Observations

Remove incorrect or outlier observations.

In [None]:
folio.create_project(
    name="delete_test",
    inputs=[InputSpec("x", "continuous", bounds=(0.0, 10.0))],
    outputs=[OutputSpec("y")],
    target_configs=[TargetConfig(objective="y", objective_mode="maximize")],
)

# Add observations
folio.add_observation("delete_test", inputs={"x": 1.0}, outputs={"y": 10.0})
folio.add_observation("delete_test", inputs={"x": 5.0}, outputs={"y": 50.0})
folio.add_observation("delete_test", inputs={"x": 9.0}, outputs={"y": 90.0}, notes="This one has an error")

print(f"Observations before deletion: {len(folio.get_observations('delete_test'))}")

In [None]:
# Find and delete the erroneous observation
observations = folio.get_observations("delete_test")
for obs in observations:
    if obs.notes and "error" in obs.notes:
        print(f"Deleting observation id={obs.id}: {obs.notes}")
        folio.delete_observation(obs.id)

print(f"Observations after deletion: {len(folio.get_observations('delete_test'))}")

## 6. Input Validation Errors

Folio validates inputs against the schema and provides clear error messages.

In [None]:
folio.create_project(
    name="validation_test",
    inputs=[
        InputSpec("temperature", "continuous", bounds=(20.0, 100.0)),
        InputSpec("solvent", "categorical", levels=["water", "ethanol"]),
    ],
    outputs=[OutputSpec("yield")],
    target_configs=[TargetConfig(objective="yield", objective_mode="maximize")],
)

In [None]:
# Error: Value outside bounds
try:
    folio.add_observation(
        "validation_test",
        inputs={"temperature": 150.0, "solvent": "water"},  # 150 > upper bound 100
        outputs={"yield": 50.0},
    )
except InvalidInputError as e:
    print(f"Caught InvalidInputError: {e}")

In [None]:
# Error: Invalid categorical level
try:
    folio.add_observation(
        "validation_test",
        inputs={"temperature": 50.0, "solvent": "acetone"},  # acetone not in levels
        outputs={"yield": 50.0},
    )
except InvalidInputError as e:
    print(f"Caught InvalidInputError: {e}")

In [None]:
# Error: Missing input
try:
    folio.add_observation(
        "validation_test",
        inputs={"temperature": 50.0},  # missing 'solvent'
        outputs={"yield": 50.0},
    )
except InvalidInputError as e:
    print(f"Caught InvalidInputError: {e}")

In [None]:
# Error: Missing output
try:
    folio.add_observation(
        "validation_test",
        inputs={"temperature": 50.0, "solvent": "water"},
        outputs={},  # missing 'yield'
    )
except InvalidOutputError as e:
    print(f"Caught InvalidOutputError: {e}")

In [None]:
# Error: Non-existent project
try:
    folio.suggest("nonexistent_project")
except ProjectNotFoundError as e:
    print(f"Caught ProjectNotFoundError: {e}")

## 7. Narrow Bounds

Very tight parameter ranges still work correctly.

In [None]:
folio.create_project(
    name="narrow_bounds",
    inputs=[InputSpec("x", "continuous", bounds=(0.0, 0.1))],  # Very narrow range
    outputs=[OutputSpec("y")],
    target_configs=[TargetConfig(objective="y", objective_mode="maximize")],
)

folio.add_observation("narrow_bounds", inputs={"x": 0.02}, outputs={"y": 1.0})
folio.add_observation("narrow_bounds", inputs={"x": 0.05}, outputs={"y": 2.0})
folio.add_observation("narrow_bounds", inputs={"x": 0.08}, outputs={"y": 1.5})

suggestion = folio.suggest("narrow_bounds")[0]
print(f"Narrow bounds [0, 0.1]: suggested x={suggestion['x']:.4f}")
assert 0.0 <= suggestion["x"] <= 0.1, "Suggestion outside bounds!"

## 8. Many Inputs (High Dimensional)

Folio handles higher-dimensional optimization spaces.

In [None]:
# Create a 5D optimization problem
folio.create_project(
    name="high_dim",
    inputs=[
        InputSpec("x1", "continuous", bounds=(0.0, 1.0)),
        InputSpec("x2", "continuous", bounds=(0.0, 1.0)),
        InputSpec("x3", "continuous", bounds=(0.0, 1.0)),
        InputSpec("x4", "continuous", bounds=(0.0, 1.0)),
        InputSpec("x5", "continuous", bounds=(0.0, 1.0)),
    ],
    outputs=[OutputSpec("y")],
    target_configs=[TargetConfig(objective="y", objective_mode="maximize")],
)

# Add some random observations
np.random.seed(42)
for _ in range(8):
    inputs = {f"x{i}": np.random.uniform(0, 1) for i in range(1, 6)}
    y = sum(inputs.values())  # Simple sum as objective
    folio.add_observation("high_dim", inputs=inputs, outputs={"y": y})

suggestion = folio.suggest("high_dim")[0]
print("5D optimization suggestion:")
for key, val in suggestion.items():
    print(f"  {key}: {val:.3f}")

## 9. Minimization Objective

Folio supports minimization as well as maximization.

In [None]:
folio.create_project(
    name="minimize_cost",
    inputs=[InputSpec("x", "continuous", bounds=(0.0, 10.0))],
    outputs=[OutputSpec("cost", units="$")],
    target_configs=[TargetConfig(objective="cost", objective_mode="minimize")],
)

# Cost function: quadratic with minimum around x=6
folio.add_observation("minimize_cost", inputs={"x": 2.0}, outputs={"cost": 20.0})
folio.add_observation("minimize_cost", inputs={"x": 4.0}, outputs={"cost": 8.0})
folio.add_observation("minimize_cost", inputs={"x": 6.0}, outputs={"cost": 2.0})
folio.add_observation("minimize_cost", inputs={"x": 8.0}, outputs={"cost": 10.0})
folio.add_observation("minimize_cost", inputs={"x": 10.0}, outputs={"cost": 25.0})

# Should suggest near the minimum (around x=6)
suggestion = folio.suggest("minimize_cost")[0]
print(f"Minimization: suggested x={suggestion['x']:.2f} (minimum is around x=6)")

## 10. Random Recommender

Use pure random sampling instead of Bayesian optimization.

In [None]:
folio.create_project(
    name="random_sampling",
    inputs=[InputSpec("x", "continuous", bounds=(0.0, 10.0))],
    outputs=[OutputSpec("y")],
    target_configs=[TargetConfig(objective="y", objective_mode="maximize")],
    recommender_config=RecommenderConfig(type="random"),
)

# Add data (won't affect suggestions since it's random)
folio.add_observation("random_sampling", inputs={"x": 5.0}, outputs={"y": 100.0})
folio.add_observation("random_sampling", inputs={"x": 5.1}, outputs={"y": 99.0})

print("Random recommender (ignores data):")
for i in range(3):
    s = folio.suggest("random_sampling")[0]
    print(f"  Suggestion {i+1}: x={s['x']:.2f}")

## Cleanup

In [None]:
# Delete all test projects
for project in folio.list_projects():
    folio.delete_project(project)
    
print(f"Remaining projects: {folio.list_projects()}")
print("Demo complete!")