[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EMQA_learning_curves/EMQA_learning_curves.ipynb)

# EMQA_learning_curves

Learning curves: good fit vs overfitting (simulated data).

**Output:** `ml_learning_curves.pdf`


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
np.random.seed(42)

epochs = np.arange(1, 101)

# --- (A) Good fit ---
train_good = 1.0 * np.exp(-0.04 * epochs) + 0.15 + np.random.normal(0, 0.008, len(epochs))
val_good = 1.1 * np.exp(-0.035 * epochs) + 0.20 + np.random.normal(0, 0.012, len(epochs))

# Early stopping point
es_epoch = 65

# --- (B) Overfitting ---
train_overfit = 1.0 * np.exp(-0.05 * epochs) + 0.05 + np.random.normal(0, 0.006, len(epochs))
# Validation first decreases then increases
val_overfit_base = 1.1 * np.exp(-0.04 * epochs) + 0.20
val_overfit_rise = np.where(epochs > 30, 0.005 * (epochs - 30)**1.1, 0)
val_overfit = val_overfit_base + val_overfit_rise + np.random.normal(0, 0.01, len(epochs))

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# (A) Good fit
ax = axes[0]
ax.plot(epochs, train_good, color=COLORS['green'], lw=2, label='Train Loss')
ax.plot(epochs, val_good, color=COLORS['orange'], lw=2, label='Validation Loss')
ax.axvline(es_epoch, color=COLORS['red'], ls='--', lw=1.5, label=f'Early Stopping (epoch {es_epoch})')
ax.set_title('(A) Good Fit', fontsize=13, fontweight='bold')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False, ncol=3)

# (B) Overfitting
ax2 = axes[1]
ax2.plot(epochs, train_overfit, color=COLORS['green'], lw=2, label='Train Loss')
ax2.plot(epochs, val_overfit, color=COLORS['orange'], lw=2, label='Validation Loss')

# Highlight overfit gap
overfit_start = 30
mask = epochs >= overfit_start
ax2.fill_between(epochs[mask], train_overfit[mask], val_overfit[mask],
                 color=COLORS['red'], alpha=0.2, label='Overfit Gap')
ax2.axvline(overfit_start, color=COLORS['gray'], ls=':', lw=1, label='Divergence Point')
ax2.set_title('(B) Overfitting', fontsize=13, fontweight='bold')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.10), frameon=False, ncol=4)

fig.suptitle('Learning Curves: Good Fit vs Overfitting', fontsize=15, fontweight='bold', y=1.02)
fig.tight_layout()
save_fig(fig, 'ml_learning_curves.pdf')
plt.show()
