[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/EMQA/blob/main/EMQA_train_test_split/EMQA_train_test_split.ipynb)

# EMQA_train_test_split

Train/test/validation split schematic for time series data.

**Output:** `ml_train_test_split.pdf`


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({
    'figure.facecolor': 'none',
    'axes.facecolor': 'none',
    'savefig.facecolor': 'none',
    'savefig.transparent': True,
    'axes.grid': False,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 11,
    'figure.figsize': (12, 6),
})

COLORS = {
    'blue': '#1A3A6E', 'red': '#CD0000', 'green': '#2E7D32',
    'orange': '#E67E22', 'purple': '#8E44AD', 'gray': '#808080',
    'cyan': '#00BCD4', 'amber': '#B5853F'
}

def save_fig(fig, name):
    fig.savefig(name, bbox_inches='tight', transparent=True, dpi=300)
    print(f"Saved: {name}")


In [None]:
import matplotlib.patches as mpatches
from matplotlib.patches import Rectangle, FancyArrowPatch

fig, axes = plt.subplots(2, 1, figsize=(14, 5), gridspec_kw={'height_ratios': [1, 1]})

# Common settings
bar_height = 0.5
y_center = 0.5

# --- (A) Simple 70/30 train/test split ---
ax = axes[0]
ax.add_patch(Rectangle((0, y_center - bar_height/2), 0.70, bar_height,
             facecolor=COLORS['green'], alpha=0.7, edgecolor='white', lw=2))
ax.add_patch(Rectangle((0.70, y_center - bar_height/2), 0.30, bar_height,
             facecolor=COLORS['red'], alpha=0.7, edgecolor='white', lw=2))

ax.text(0.35, y_center, 'Train (70%)', ha='center', va='center',
        fontsize=14, fontweight='bold', color='white')
ax.text(0.85, y_center, 'Test (30%)', ha='center', va='center',
        fontsize=14, fontweight='bold', color='white')

ax.set_xlim(-0.02, 1.02)
ax.set_ylim(0, 1)
ax.set_title('(A) Simple Train / Test Split', fontsize=13, fontweight='bold')
ax.set_xticks([])
ax.set_yticks([])
for spine in ax.spines.values():
    spine.set_visible(False)

# Time arrow
ax.annotate('', xy=(1.0, 0.05), xytext=(0.0, 0.05),
            arrowprops=dict(arrowstyle='->', color=COLORS['gray'], lw=1.5))
ax.text(0.5, -0.05, 'Time', ha='center', va='top', fontsize=11, color=COLORS['gray'])

# --- (B) 70/15/15 train/val/test split ---
ax2 = axes[1]
ax2.add_patch(Rectangle((0, y_center - bar_height/2), 0.70, bar_height,
              facecolor=COLORS['green'], alpha=0.7, edgecolor='white', lw=2))
ax2.add_patch(Rectangle((0.70, y_center - bar_height/2), 0.15, bar_height,
              facecolor=COLORS['orange'], alpha=0.7, edgecolor='white', lw=2))
ax2.add_patch(Rectangle((0.85, y_center - bar_height/2), 0.15, bar_height,
              facecolor=COLORS['red'], alpha=0.7, edgecolor='white', lw=2))

ax2.text(0.35, y_center, 'Train (70%)', ha='center', va='center',
         fontsize=14, fontweight='bold', color='white')
ax2.text(0.775, y_center, 'Val (15%)', ha='center', va='center',
         fontsize=13, fontweight='bold', color='white')
ax2.text(0.925, y_center, 'Test (15%)', ha='center', va='center',
         fontsize=13, fontweight='bold', color='white')

ax2.set_xlim(-0.02, 1.02)
ax2.set_ylim(0, 1)
ax2.set_title('(B) Train / Validation / Test Split', fontsize=13, fontweight='bold')
ax2.set_xticks([])
ax2.set_yticks([])
for spine in ax2.spines.values():
    spine.set_visible(False)

# Time arrow
ax2.annotate('', xy=(1.0, 0.05), xytext=(0.0, 0.05),
             arrowprops=dict(arrowstyle='->', color=COLORS['gray'], lw=1.5))
ax2.text(0.5, -0.05, 'Time', ha='center', va='top', fontsize=11, color=COLORS['gray'])

fig.suptitle('Data Splitting Strategies for Time Series', fontsize=15, fontweight='bold', y=1.04)
fig.tight_layout()
save_fig(fig, 'ml_train_test_split.pdf')
plt.show()
