In [9]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf

def generate_ar1_time_series(n_points=1000, n_dims=1, drift_point=500, 
                             phi_initial=0.5, phi_final=0.9, 
                             noise_std_low=0.1, noise_std_high=0.5):
    """
    Generate a multivariate AR(1) time-series with a gradual drift in the AR coefficient 
    and an increase in noise level after the drift point.
    
    Each datapoint receives a label:
      - 0 if t < drift_point (before drift)
      - 1 if t >= drift_point (after drift)
    
    If n_dims > 1, each dimension is generated as an independent AR(1) process.
    
    Returns:
      - data: NumPy array of shape (n_points, n_dims)
      - labels: NumPy array of shape (n_points,) containing the regime label
    """
    data = np.zeros((n_points, n_dims))
    # Create time-varying AR coefficients: constant until drift_point, then interpolate
    phi_values = np.concatenate([
        np.full(drift_point, phi_initial),
        np.linspace(phi_initial, phi_final, n_points - drift_point)
    ])

    for t in range(1, n_points):
        noise_std = noise_std_low if t < drift_point else noise_std_high
        noise = np.random.normal(scale=noise_std, size=n_dims)
        data[t, :] = phi_values[t] * data[t-1, :] + noise

    # Create labels: 0 for pre-drift, 1 for post-drift
    labels = np.zeros(n_points, dtype=int)
    labels[drift_point:] = 1

    # Plot the first dimension for visualization
    plt.figure(figsize=(10, 5))
    plt.plot(data[:, 0], label='Dimension 1')
    plt.axvline(drift_point, color='red', linestyle='--', label='Drift Start')
    plt.title("Multivariate AR(1) Time Series with Drift and Noise")
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.legend()
    plt.show()

    return data, labels

def generate_var_time_series(n_points=1000, dim=5, drift_point=500, 
                             real_drift_point=750, extra_noise_dim=2, noise_std=0.1):
    """
    Generate a multivariate VAR process with both virtual and real concept drifts.
    
    - Virtual drift: gradual change in the coefficient matrix.
    - Real drift: abrupt change in the relationship for one feature.
    Additionally, extra features contain pure noise.
    
    Labels are assigned as follows:
      - 0 for t < drift_point (initial regime)
      - 1 for drift_point <= t < real_drift_point (virtual drift regime)
      - 2 for t >= real_drift_point (real drift regime)
    
    Returns:
      - data_full: NumPy array of shape (n_points, dim + extra_noise_dim)
      - labels: NumPy array of shape (n_points,) containing the regime label
    """
    # Define two stable coefficient matrices (ensure spectral radius < 1)
    A1 = 0.5 * np.eye(dim) + 0.1 * (np.ones((dim, dim)) - np.eye(dim))
    A2 = 0.8 * np.eye(dim) + 0.05 * (np.ones((dim, dim)) - np.eye(dim))

    def get_A(t):
        if t < drift_point:
            return A1
        else:
            alpha = (t - drift_point) / (n_points - drift_point)
            return (1 - alpha) * A1 + alpha * A2

    # Adjusted matrix for a real drift on feature 3 (index 2)
    A_real_drift = A2.copy()
    A_real_drift[2, :] *= 1.5

    data = np.zeros((n_points, dim))
    data[0, :] = np.random.normal(size=dim)

    for t in range(1, n_points):
        A_t = get_A(t)
        if t == real_drift_point:
            A_t = A_real_drift
        noise = np.random.normal(scale=noise_std, size=dim)
        data[t, :] = A_t @ data[t-1, :] + noise

    # Generate additional noise-only features
    noise_features = np.random.normal(size=(n_points, extra_noise_dim))
    data_full = np.hstack([data, noise_features])

    # Create labels:
    # 0 for t < drift_point, 1 for drift_point <= t < real_drift_point, 2 for t >= real_drift_point
    labels = np.zeros(n_points, dtype=int)
    labels[drift_point:real_drift_point] = 1
    labels[real_drift_point:] = 2

    # Plot the first signal feature for visualization
    plt.figure(figsize=(10, 5))
    plt.plot(data_full[:, 0], label="Feature 1")
    plt.axvline(drift_point, color='red', linestyle='--', label='Virtual Drift Start')
    plt.axvline(real_drift_point, color='green', linestyle='--', label='Real Drift')
    plt.title("Multivariate VAR Process with Virtual and Real Drift")
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.legend()
    plt.show()

    return data_full, labels

def generate_hybrid_time_series(n_points=1000, n_dims=1, 
                                drift_virtual_start=300, drift_real_point=600, 
                                phi=0.6, season_amplitude_initial=1.0, season_amplitude_final=2.0,
                                seasonal_period=50):
    """
    Generate a multivariate hybrid time series by combining an AR(1) process with 
    a seasonal sine wave component.
    
    - Virtual drift: gradual change in the seasonal coefficient.
    - Real drift: abrupt inversion of the seasonal effect.
    - Noise: Includes Gaussian noise with occasional impulsive noise.
    
    For multivariate data (n_dims > 1), each dimension is generated independently 
    with a random phase shift for its seasonal component.
    
    Labels are assigned as follows:
      - 0 for t < drift_virtual_start
      - 1 for drift_virtual_start <= t < drift_real_point
      - 2 for t >= drift_real_point
    
    Returns:
      - data: NumPy array of shape (n_points, n_dims)
      - labels: NumPy array of shape (n_points,) containing the regime label
    """
    data = np.zeros((n_points, n_dims))
    time_index = np.arange(n_points)
    
    # For multivariate data, assign each dimension a random phase shift for the sine component.
    phases = np.random.uniform(0, 2*np.pi, size=n_dims)
    
    for d in range(n_dims):
        seasonal = np.sin(2 * np.pi * time_index / seasonal_period + phases[d])
        for t in range(1, n_points):
            if t < drift_virtual_start:
                seasonal_coef = season_amplitude_initial
            elif t < drift_real_point:
                alpha = (t - drift_virtual_start) / (drift_real_point - drift_virtual_start)
                seasonal_coef = (1 - alpha) * season_amplitude_initial + alpha * season_amplitude_final
            else:
                seasonal_coef = -season_amplitude_final

            impulsive_noise = np.random.normal(scale=5.0) if np.random.rand() < 0.01 else 0
            gaussian_noise = np.random.normal(scale=0.5)
            noise = gaussian_noise + impulsive_noise

            data[t, d] = phi * data[t-1, d] + seasonal_coef * seasonal[t] + noise

    # Create labels:
    # 0 if t < drift_virtual_start, 1 if drift_virtual_start <= t < drift_real_point, 2 if t >= drift_real_point
    labels = np.zeros(n_points, dtype=int)
    labels[drift_virtual_start:drift_real_point] = 1
    labels[drift_real_point:] = 2

    # Plot the first dimension for visualization
    plt.figure(figsize=(10, 5))
    plt.plot(data[:, 0], label="Dimension 1")
    plt.axvline(drift_virtual_start, color='red', linestyle='--', label='Virtual Drift Start')
    plt.axvline(drift_real_point, color='green', linestyle='--', label='Real Drift')
    plt.title("Multivariate Hybrid Time Series with Intermittent Drifts")
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.legend()
    plt.show()

    return data, labels

def fetch_crypto_data(ticker="BTC-USD", period="1y", interval="1d"):
    """
    Download cryptocurrency price data using yfinance. This returns a DataFrame 
    with multiple attributes (Open, High, Low, Close, etc.) and adds a default label of 0 
    for every datapoint.
    
    Returns:
      - data: Pandas DataFrame with crypto data and a new column 'Label'
    """
    data = yf.download(ticker, period=period, interval=interval)
    
    if data.empty:
        print("No data downloaded. Please check the ticker and network connection.")
        return None
    
    # Add a default label column (here we use 0 for all datapoints)
    data["Label"] = 0

    # Plot the Closing Price for visualization
    plt.figure(figsize=(10, 5))
    plt.plot(data.index, data['Close'], label=f'{ticker} Close Price')
    plt.title(f"{ticker} Price Data")
    plt.xlabel("Date")
    plt.ylabel("Price (USD)")
    plt.legend()
    plt.show()

    return data

if __name__ == "__main__":
    # Run each function and retrieve the generated data with labels

    # 1. Multivariate AR(1) series (e.g., 3 dimensions)
    print("Generating Multivariate AR(1) Time Series with Drift and Noise...")
    ar1_data, ar1_labels = generate_ar1_time_series(n_points=1000, n_dims=3)
    print("AR(1) Labels:", np.unique(ar1_labels))

    # 2. Multivariate VAR process (returns output with shape (n_points, dim+extra_noise_dim))
    print("Generating Multivariate VAR Process with Virtual and Real Drift...")
    var_data, var_labels = generate_var_time_series(n_points=1000, dim=5, extra_noise_dim=2)
    print("VAR Labels:", np.unique(var_labels))

    # 3. Multivariate Hybrid time series (e.g., 4 dimensions)
    print("Generating Multivariate Hybrid Time Series with Intermittent Drifts...")
    hybrid_data, hybrid_labels = generate_hybrid_time_series(n_points=1000, n_dims=4)
    print("Hybrid Labels:", np.unique(hybrid_labels))

    # 4. Fetch Cryptocurrency Data (BTC-USD) with default labels
    print("Fetching Cryptocurrency Data (BTC-USD)...")
    crypto_data = fetch_crypto_data(ticker="BTC-USD", period="1y", interval="1d")
    if crypto_data is not None:
        print("Crypto Data Columns:", crypto_data.columns.tolist())
