---
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](
https://colab.research.google.com/github/ShamsaraE/time-series-medicine-biology-2026/blob/main/notebooks/06_White_Red_Noise_Missings.ipynb)
---

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Set seed for reproducibility
np.random.seed(42)

# Length of time series
n = 200

# Generate white noise (mean=0, std=1)
white_noise = np.random.normal(loc=0, scale=1, size=n)

# Time axis
time = np.arange(n)

# Plot
plt.figure()
plt.plot(time, white_noise)
plt.title("White Noise Process")
plt.xlabel("Time")
plt.ylabel("Value")
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plot_acf(white_noise, lags=20)
plt.show()

In [None]:
# Parameters
n = 200           # length of series
r = 0.7           # lag-1 correlation coefficient
sigma = 1         # standard deviation of white noise

# Generate white noise w_t
w = np.random.normal(loc=0, scale=sigma, size=n)

# Initialize red noise series
x = np.zeros(n)

# Generate AR(1) process
for t in range(1, n):
    x[t] = r * x[t-1] + np.sqrt(1 - r**2) * w[t]

# Time axis
time = np.arange(n)

# Plot
plt.figure()
plt.plot(time, x)
plt.title(f"Red Noise (AR(1)) Process, r = {r}")
plt.xlabel("Time")
plt.ylabel("Value")
plt.show()

In [None]:
n = 300
time = np.arange(n)

r_values = [0.0, 0.4, 0.8, 0.95]

plt.figure(figsize=(10,8))

for i, r in enumerate(r_values):
    w = np.random.normal(size=n)
    x = np.zeros(n)
    for t in range(1, n):
        x[t] = r*x[t-1] + np.sqrt(1-r**2)*w[t]

    plt.subplot(len(r_values),1,i+1)
    plt.plot(time, x)
    plt.title(f"AR(1) with r = {r}")

plt.tight_layout()
plt.show()

In [None]:
from scipy.signal import periodogram


n = 100

# White noise
white = np.random.normal(size=n)

# Red noise (AR1)
r = 0.8
red = np.zeros(n)
w = np.random.normal(size=n)

for t in range(1, n):
    red[t] = r*red[t-1] + np.sqrt(1-r**2)*w[t]

# Compute periodograms
f_white, P_white = periodogram(white)
f_red, P_red = periodogram(red)

# Plot
plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.plot(f_white, P_white)
plt.title("White Noise Periodogram")

plt.subplot(1,2,2)
plt.plot(f_red, P_red)
plt.title("Red Noise (AR1) Periodogram")

plt.tight_layout()
plt.show()

## Red Noise

- More power at low frequencies
- Power decreases toward high frequencies


In [None]:
# 1) Create a synthetic time series with gaps (NaNs)


n = 240  # 240 days
idx = pd.date_range("2025-01-01", periods=n, freq="D")

# Signal: baseline + trend + weekly seasonality + noise
trend = 0.02 * np.arange(n)
season = 2.0 * np.sin(2 * np.pi * np.arange(n) / 7)
noise = np.random.normal(scale=0.8, size=n)

ts = pd.Series(10 + trend + season + noise, index=idx, name="value")

# Introduce missingness: random missing points + one contiguous gap
ts_missing = ts.copy()
rng = np.random.default_rng(123)

random_missing_idx = rng.choice(n, size=20, replace=False)
ts_missing.iloc[random_missing_idx] = np.nan
ts_missing.loc["2025-04-10":"2025-04-25"] = np.nan  # contiguous gap



plt.figure(figsize=(12, 4))
plt.plot(ts_missing.index, ts_missing.values, linewidth=2)
plt.title("Synthetic time series with missing values (gaps)")
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True, alpha=0.3)
plt.show()



In [None]:
#Missingness map (yellow = missing)

plt.figure(figsize=(12, 1.3))
plt.imshow(ts_missing.isna().to_numpy()[None, :], aspect="auto")
plt.yticks([])
plt.title("Missingness map (yellow = missing)")
plt.xlabel("Time index (days)")
plt.show()


In [None]:

# Missing-data handling methods

ts_ffill = ts_missing.ffill()
ts_bfill = ts_missing.bfill()
ts_linear = ts_missing.interpolate(method="time")  # time-aware linear interpolation
ts_spline = ts_missing.interpolate(method="spline", order=3)  # may overshoot; for comparison

plt.figure(figsize=(12, 4))
plt.plot(ts_missing.index, ts_missing.values, label="Original (with NaNs)", linewidth=2)
plt.plot(ts_ffill.index, ts_ffill.values, label="Forward fill (LOCF)", alpha=0.9)
plt.plot(ts_linear.index, ts_linear.values, label="Time interpolation (linear)", alpha=0.9)
plt.plot(ts_spline.index, ts_spline.values, label="Spline (order=3)", alpha=0.8)
plt.title("Missing-data handling: overlay comparison")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
