# Market Data Outlier Detection (FRED H.15)

This notebook demonstrates techniques for market data quality checks:
- Missing values
- Stale rates (no changes)
- Z-score anomaly detection with different context windows
- IsolationForest (ML-based anomaly detection)


In [None]:
!pip install fredapi scikit-learn matplotlib pandas


In [None]:
from fredapi import Fred
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest

# Provide your FRED API key here
fred = Fred(api_key='YOUR_FRED_API_KEY')

series = {
    'DGS1MO': '1M',
    'DGS3MO': '3M',
    'DGS2': '2Y',
    'DGS10': '10Y',
    'DGS30': '30Y'
}

df = pd.DataFrame({name: fred.get_series(code) for code, name in series.items()})
df = df.dropna(how='all')
df.tail()

## Missing Data

In [None]:
df.isna().sum()


## Stale Rates Detection

In [None]:
stale = (df.diff().abs() < 1e-8).astype(int).rolling(5).sum()
stale[stale>3].dropna().head()

## Z-score Anomaly Detection

In [None]:
window = 60
rolling_mean = df['10Y'].rolling(window).mean()
rolling_std = df['10Y'].rolling(window).std()
zscore = (df['10Y'] - rolling_mean) / rolling_std

anomalies = zscore[abs(zscore) > 3]
anomalies.head()

## Isolation Forest

In [None]:
model = IsolationForest(contamination=0.01, random_state=42)
features = df.diff().fillna(0)
df['anomaly'] = model.fit_predict(features)
df['anomaly'].value_counts()