In [None]:
# Install dependencies
!pip install pandas numpy scikit-learn --quiet

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# -----------------------------
# Step 1: Example historical data
# -----------------------------
# Normally, this comes from Supabase: past quotes, CAD features, hours logged
# Features: tonnage (tons), complexity score (1-10), number of parts
historical_data = pd.DataFrame([
    {'tonnage': 5.0, 'complexity': 3, 'num_parts': 10, 'hours': 40},
    {'tonnage': 8.0, 'complexity': 5, 'num_parts': 15, 'hours': 70},
    {'tonnage': 2.5, 'complexity': 2, 'num_parts': 5, 'hours': 18},
    {'tonnage': 12.0, 'complexity': 7, 'num_parts': 20, 'hours': 120},
    {'tonnage': 6.0, 'complexity': 4, 'num_parts': 12, 'hours': 55},
])

# Features and target
X = historical_data[['tonnage', 'complexity', 'num_parts']]
y = historical_data['hours']

# -----------------------------
# Step 2: Train a simple regression model
# -----------------------------
model = LinearRegression()
model.fit(X, y)

# -----------------------------
# Step 3: Predict labor hours for a new CAD upload
# -----------------------------
# Example input (from CAD parser + complexity estimator)
new_job = pd.DataFrame([{
    'tonnage': 7.0,
    'complexity': 5,
    'num_parts': 14
}])

predicted_hours = model.predict(new_job)[0]
print(f"Predicted labor hours for this job: {predicted_hours:.1f} hours")

# -----------------------------
# Step 4 (Optional): Integration
# -----------------------------
# You can feed this predicted_hours into your cost calculation module:
# labor_cost = predicted_hours * hourly_rate
# total_cost = material_cost + labor_cost
