# Solution: The Unscaled Lasso

This is the answer key for `drill_11_unscaled_lasso.ipynb`.

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

np.random.seed(42)

In [None]:
# Generate data with features on different scales
n = 500

tenure_days = np.random.uniform(30, 1000, n)
monthly_spend = np.random.uniform(20, 200, n)
orders = np.random.poisson(5, n)
is_premium = np.random.binomial(1, 0.3, n)

true_coefs = {
    'tenure_days': 0.5,
    'monthly_spend': 2.0,
    'orders': 30.0,
    'is_premium': 100.0
}

ltv = (
    true_coefs['tenure_days'] * tenure_days +
    true_coefs['monthly_spend'] * monthly_spend +
    true_coefs['orders'] * orders +
    true_coefs['is_premium'] * is_premium +
    np.random.normal(0, 50, n)
)

df = pd.DataFrame({
    'tenure_days': tenure_days,
    'monthly_spend': monthly_spend,
    'orders': orders,
    'is_premium': is_premium,
    'ltv': ltv
})

feature_cols = ['tenure_days', 'monthly_spend', 'orders', 'is_premium']
X = df[feature_cols]
y = df['ltv']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# BUGGY: Lasso without scaling
lasso_unscaled = Lasso(alpha=1.0)
lasso_unscaled.fit(X_train, y_train)

print("=== Unscaled Lasso (WRONG) ===")
for name, coef in zip(feature_cols, lasso_unscaled.coef_):
    status = "ZEROED" if abs(coef) < 0.01 else f"{coef:.3f}"
    print(f"  {name:<15}: {status}")

## SOLUTION: Scale Before Lasso

In [None]:
# Step 1: Scale the features (fit on train only!)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 2: Fit Lasso on scaled data
lasso_scaled = LassoCV(cv=5)  # Use CV to find best alpha
lasso_scaled.fit(X_train_scaled, y_train)

print("=== Fixed Lasso Results (Scaled) ===")
print(f"\nOptimal alpha: {lasso_scaled.alpha_:.4f}")
print("\nCoefficients:")
for name, coef in zip(feature_cols, lasso_scaled.coef_):
    status = "ZEROED" if abs(coef) < 0.01 else f"{coef:.3f}"
    print(f"  {name:<15}: {status}")

In [None]:
# Compare performance
r2_unscaled = r2_score(y_test, lasso_unscaled.predict(X_test))
r2_scaled = r2_score(y_test, lasso_scaled.predict(X_test_scaled))

print("=== Performance Comparison ===")
print(f"\nUnscaled Lasso:")
print(f"  R²: {r2_unscaled:.3f}")
print(f"  Features selected: {(np.abs(lasso_unscaled.coef_) > 0.01).sum()}")
print(f"\nScaled Lasso:")
print(f"  R²: {r2_scaled:.3f}")
print(f"  Features selected: {(np.abs(lasso_scaled.coef_) > 0.01).sum()}")

In [None]:
# Self-check
tenure_idx = feature_cols.index('tenure_days')
assert abs(lasso_scaled.coef_[tenure_idx]) > 0.01, "tenure_days should not be zeroed after scaling!"
assert r2_scaled > r2_unscaled, "Scaled Lasso should have better R²"

print("✓ Lasso fixed!")
print(f"✓ tenure_days now has coefficient: {lasso_scaled.coef_[tenure_idx]:.3f}")
print(f"✓ R² improved: {r2_unscaled:.3f} → {r2_scaled:.3f}")

## Sample Postmortem

### What happened:
- Lasso wrongly eliminated `tenure_days` as "unimportant" despite it being a strong predictor.

### Root cause:
- Lasso penalizes the sum of |coefficients|. When features are on different scales, features with large ranges (like tenure_days: 30-1000) get small coefficients, which Lasso then zeros out. The coefficient size doesn't reflect feature importance when scales differ.

### How to prevent:
- **Always scale before Lasso/Ridge/ElasticNet.** Use StandardScaler to make features comparable.
- **Interpret coefficients on scaled data.** After scaling, larger |coefficient| = more important.
- **Decision trees don't need scaling** — they're scale-invariant.