In [1]:
!pip install scikit-learn numpy



In [2]:
import json
import random
import numpy as np
import math
import re
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# ------------------------------
# 1. Load JSON Data
# ------------------------------
with open("paneer_recipes.json", "r") as f:
    recipes = json.load(f)

# ------------------------------
# 2. Helper: Parse Quantities
# ------------------------------
def parse_quantity(value):
    """
    Extract a numeric quantity from a recipe string like:
    '1¾ nos. / 140 grams' → 140.0
    '2 nos.' → 2.0
    '1.5 cup' → 1.5
    """
    if isinstance(value, (int, float)):
        return float(value)
    
    if isinstance(value, str):
        numbers = re.findall(r"[\d\.]+", value)  # finds 1, 1.5, 140 etc.
        if numbers:
            return float(numbers[-1])  # take last number (often grams/ml)
    
    return None  # if no number found

# ------------------------------
# 3. Scaling Approaches
# ------------------------------
def linear_scaling(s1, q1, s2, q2, s):
    """Linear interpolation/extrapolation between two known serving sizes."""
    return q1 + (q2 - q1) * (s - s1) / (s2 - s1)

def proportional_scaling(s1, q1, s2, q2, s):
    """Proportional scaling using average ratio from two known points."""
    factor1 = q1 / s1
    factor2 = q2 / s2
    avg_factor = (factor1 + factor2) / 2
    return avg_factor * s

def power_law_scaling(s1, q1, s2, q2, s):
    """Power law scaling: q(s) = a * s^b"""
    if s1 == s2 or q1 <= 0 or q2 <= 0:
        return None
    b = math.log(q2/q1) / math.log(s2/s1)
    a = q1 / (s1 ** b)
    return a * (s ** b)

# ------------------------------
# 4. Evaluation Metrics
# ------------------------------
def evaluate(y_true, y_pred):
    """Return a dict of metrics comparing predictions with ground truth."""
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mape = np.mean(np.abs((np.array(y_true) - np.array(y_pred)) / np.array(y_true))) * 100
    r2 = r2_score(y_true, y_pred)
    return {"MAE": mae, "RMSE": rmse, "MAPE": mape, "R2": r2}

# ------------------------------
# 5. Evaluation Loop
# ------------------------------
def run_evaluation(recipes):
    results = []

    for recipe, servings in recipes.items():
        serving_sizes = list(map(int, servings.keys()))
        
        # Randomly pick 2 known sizes
        if len(serving_sizes) < 2:
            continue
        s_known = random.sample(serving_sizes, 2)
        s_unknown = [s for s in serving_sizes if s not in s_known]

        for ingredient in servings[str(serving_sizes[0])].keys():
            # Parse known quantities
            q1 = parse_quantity(servings[str(s_known[0])][ingredient])
            q2 = parse_quantity(servings[str(s_known[1])][ingredient])

            if q1 is None or q2 is None:
                continue  # skip if invalid

            y_true = []
            y_pred_linear, y_pred_prop, y_pred_power = [], [], []

            for s in s_unknown:
                true_val = parse_quantity(servings[str(s)][ingredient])
                if true_val is None:
                    continue

                y_true.append(true_val)

                # Predictions
                pred_lin = linear_scaling(s_known[0], q1, s_known[1], q2, s)
                pred_prop = proportional_scaling(s_known[0], q1, s_known[1], q2, s)
                pred_power = power_law_scaling(s_known[0], q1, s_known[1], q2, s)

                # Skip None predictions (power law might return None)
                if pred_power is None:
                    continue

                y_pred_linear.append(pred_lin)
                y_pred_prop.append(pred_prop)
                y_pred_power.append(pred_power)

            if y_true and y_pred_linear and y_pred_prop and y_pred_power:
                metrics_lin = evaluate(y_true, y_pred_linear)
                metrics_prop = evaluate(y_true, y_pred_prop)
                metrics_power = evaluate(y_true, y_pred_power)

                results.append({
                    "recipe": recipe,
                    "ingredient": ingredient,
                    "known_sizes": s_known,
                    "linear_metrics": metrics_lin,
                    "proportional_metrics": metrics_prop,
                    "powerlaw_metrics": metrics_power
                })

    return results

# ------------------------------
# 6. Run and Report
# ------------------------------
if __name__ == "__main__":
    final_results = run_evaluation(recipes)

    # Show first few results
    for res in final_results[:5]:
        print(f"\nRecipe: {res['recipe']} | Ingredient: {res['ingredient']}")
        print(f"Known sizes: {res['known_sizes']}")
        print("Linear:", res["linear_metrics"])
        print("Proportional:", res["proportional_metrics"])
        print("Power Law:", res["powerlaw_metrics"])



Recipe: palak_paneer | Ingredient: Onion
Known sizes: [1, 4]
Linear: {'MAE': 14.999999999999993, 'RMSE': np.float64(15.365907428821473), 'MAPE': np.float64(16.08734402852049), 'R2': -0.51111111111111}
Proportional: {'MAE': 52.5, 'RMSE': np.float64(55.339859052946636), 'MAPE': np.float64(52.406417112299465), 'R2': -18.6}
Power Law: {'MAE': 20.1577249342426, 'RMSE': np.float64(20.537137802438924), 'MAPE': np.float64(21.545407847487542), 'R2': -1.6993537863447417}

Recipe: palak_paneer | Ingredient: Garlic
Known sizes: [1, 4]
Linear: {'MAE': 0.8149999999999995, 'RMSE': np.float64(1.1525840533340717), 'MAPE': np.float64(4.900781719783521), 'R2': 0.5321082337609744}
Proportional: {'MAE': 2.3774999999999995, 'RMSE': np.float64(3.105239523772683), 'MAPE': np.float64(12.08001352976548), 'R2': -2.3961776541133566}
Power Law: {'MAE': 0.7447703582191227, 'RMSE': np.float64(0.7484219599353742), 'MAPE': np.float64(4.138572390338368), 'R2': 0.8027153782762879}

Recipe: palak_paneer | Ingredient: Gr