## 7. UNDERFITTING

### Definition
**Model too simple to capture underlying patterns**. Poor performance on both training and test data.

```
Underfitting = Model is too simple
               Misses key patterns in data
```

### Code Example:


In [None]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# Nonlinear data: y = x² + noise
X = np.linspace(-5, 5, 100).reshape(-1, 1)
y = (X.ravel() ** 2) + np.random.normal(0, 5, 100)

# Test different model complexities
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Underfit: Degree 1 (straight line)
poly1 = PolynomialFeatures(1)
X1 = poly1.fit_transform(X)
model1 = LinearRegression()
model1.fit(X1, y)

axes[0].scatter(X, y, alpha=0.5)
axes[0].plot(X, model1.predict(X1), 'r-', linewidth=2)
axes[0].set_title('Underfitted (Degree 1)')
axes[0].set_ylabel('Accuracy (Train): 0.20\nAccuracy (Test): 0.18')

# Good fit: Degree 2
poly2 = PolynomialFeatures(2)
X2 = poly2.fit_transform(X)
model2 = LinearRegression()
model2.fit(X2, y)

axes[1].scatter(X, y, alpha=0.5)
axes[1].plot(X, model2.predict(X2), 'g-', linewidth=2)
axes[1].set_title('Good Fit (Degree 2)')
axes[1].set_ylabel('Accuracy (Train): 0.85\nAccuracy (Test): 0.84')

# Overfit: Degree 10
poly10 = PolynomialFeatures(10)
X10 = poly10.fit_transform(X)
model10 = LinearRegression()
model10.fit(X10, y)

axes[2].scatter(X, y, alpha=0.5)
axes[2].plot(X, model10.predict(X10), 'r-', linewidth=2)
axes[2].set_title('Overfitted (Degree 10)')
axes[2].set_ylabel('Accuracy (Train): 0.99\nAccuracy (Test): 0.15')

plt.tight_layout()
plt.show()


### Causes of Underfitting:


In [None]:
# 1. Model too simple
#    Solution: Use more complex model

# 2. Insufficient training
#    Solution: Train longer, more epochs

# 3. Poor features
#    Solution: Feature engineering, add features

# 4. Too much regularization
#    Solution: Reduce regularization strength

# 5. Wrong algorithm
#    Solution: Try different algorithm

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Solution 1: More complex model
simple = LinearRegression()          # Underfitting risk
medium = RandomForestClassifier()    # Better
complex = MLPClassifier()            # Most complex

# Solution 2: Feature engineering
def create_features(X):
    """Add polynomial features"""
    from sklearn.preprocessing import PolynomialFeatures
    poly = PolynomialFeatures(2)
    return poly.fit_transform(X)

# Solution 3: Reduce regularization
from sklearn.linear_model import Ridge

strong_reg = Ridge(alpha=100)   # Too much regularization
weak_reg = Ridge(alpha=0.1)     # Better

# Solution 4: Train longer
model.fit(X_train, y_train, epochs=100)  # Train more epochs


---
