In [None]:
%pip install yfinance
%pip install matplotlib
%pip install pandas numpy
%pip install --upgrade sympy torch
%pip install torch torchvision torchaudio
%pip install ripser

# 3. Restart runtime again

# 4. Now import and run your code
import torch
print(torch.__version__)

# TOPOLOGICAL DATA ANALYSIS (RAW)

In [None]:
"""
Topological Data Analysis (TDA) from Scratch
Uses ripser (lightweight C++ library) for persistent homology
"""

import numpy as np
from scipy.spatial.distance import pdist, squareform

# Install lightweight ripser library
# !pip install ripser

try:
    from ripser import ripser
    RIPSER_AVAILABLE = True
    print("✅ Ripser library available")
except ImportError:
    RIPSER_AVAILABLE = False
    print("⚠️ Install ripser: pip install ripser")

# ==================== TAKENS EMBEDDING ====================

def takens_embedding(time_series, embedding_dim=3, time_delay=1):
    """
    Create Takens embedding (phase space reconstruction)
    
    Args:
        time_series: 1D numpy array
        embedding_dim: dimension of embedding
        time_delay: time delay between coordinates
        
    Returns:
        2D array of shape (n_points, embedding_dim)
    """
    n = len(time_series)
    m = n - (embedding_dim - 1) * time_delay
    
    if m <= 0:
        raise ValueError("Time series too short for given embedding parameters")
    
    embedded = np.zeros((m, embedding_dim))
    for i in range(m):
        for j in range(embedding_dim):
            embedded[i, j] = time_series[i + j * time_delay]
    
    return embedded


# ==================== PERSISTENT HOMOLOGY ====================

def compute_persistence_diagrams(point_cloud, max_dimension=1, max_edge_length=None):
    """
    Compute persistence diagrams using Vietoris-Rips complex
    
    Args:
        point_cloud: 2D array of points
        max_dimension: maximum homology dimension to compute
        max_edge_length: maximum edge length in Rips complex
        
    Returns:
        Dictionary with persistence diagrams for each dimension
    """
    if not RIPSER_AVAILABLE:
        raise ImportError("ripser not installed. Run: pip install ripser")
    
    # Compute persistence using ripser
    result = ripser(point_cloud, maxdim=max_dimension, thresh=max_edge_length)
    
    return result['dgms']


# ==================== FEATURE EXTRACTION FROM DIAGRAMS ====================

def persistence_entropy(diagram, normalize=True):
    """
    Compute persistence entropy
    
    Measures the distribution of lifetimes of topological features
    High entropy = many features with similar lifetimes
    Low entropy = few dominant features
    """
    if len(diagram) == 0:
        return 0.0
    
    # Remove infinite points
    diagram = diagram[np.isfinite(diagram).all(axis=1)]
    
    if len(diagram) == 0:
        return 0.0
    
    # Compute lifetimes (persistence)
    lifetimes = diagram[:, 1] - diagram[:, 0]
    
    # Remove zero lifetimes
    lifetimes = lifetimes[lifetimes > 0]
    
    if len(lifetimes) == 0:
        return 0.0
    
    # Normalize to get probability distribution
    if normalize:
        lifetimes = lifetimes / np.sum(lifetimes)
    
    # Compute entropy
    entropy = -np.sum(lifetimes * np.log(lifetimes + 1e-10))
    
    return float(entropy)


def persistence_landscape(diagram, k=1, resolution=100):
    """
    Compute k-th persistence landscape
    
    Landscapes are functional summaries of persistence diagrams
    """
    if len(diagram) == 0:
        return np.zeros(resolution)
    
    # Remove infinite points
    diagram = diagram[np.isfinite(diagram).all(axis=1)]
    
    if len(diagram) == 0:
        return np.zeros(resolution)
    
    # Create grid
    births = diagram[:, 0]
    deaths = diagram[:, 1]
    min_val = np.min(births)
    max_val = np.max(deaths)
    
    grid = np.linspace(min_val, max_val, resolution)
    
    # Compute landscape functions for each point
    landscapes = []
    for birth, death in zip(births, deaths):
        midpoint = (birth + death) / 2
        height = (death - birth) / 2
        
        # Triangle function
        landscape = np.zeros(resolution)
        for i, t in enumerate(grid):
            if birth <= t <= midpoint:
                landscape[i] = (t - birth)
            elif midpoint < t <= death:
                landscape[i] = (death - t)
        
        landscapes.append(landscape)
    
    # Sort and take k-th landscape
    landscapes = np.array(landscapes)
    if len(landscapes) < k:
        return np.zeros(resolution)
    
    # Sort at each time point
    sorted_landscapes = np.sort(landscapes, axis=0)[::-1]
    
    return sorted_landscapes[k-1] if k <= len(sorted_landscapes) else np.zeros(resolution)


def betti_numbers(diagram, t):
    """
    Compute Betti number at filtration value t
    
    Betti number = number of topological features alive at time t
    - Betti-0: number of connected components
    - Betti-1: number of holes/loops
    """
    if len(diagram) == 0:
        return 0
    
    # Remove infinite points
    diagram = diagram[np.isfinite(diagram).all(axis=1)]
    
    # Count features born before t and dying after t
    alive = np.sum((diagram[:, 0] <= t) & (diagram[:, 1] > t))
    
    return int(alive)


def max_persistence(diagram):
    """Maximum persistence (lifetime) of any feature"""
    if len(diagram) == 0:
        return 0.0
    
    diagram = diagram[np.isfinite(diagram).all(axis=1)]
    
    if len(diagram) == 0:
        return 0.0
    
    lifetimes = diagram[:, 1] - diagram[:, 0]
    return float(np.max(lifetimes))


def total_persistence(diagram, power=1):
    """
    Total persistence (sum of all lifetimes)
    
    Args:
        power: exponent to apply to lifetimes (1=sum, 2=sum of squares, etc.)
    """
    if len(diagram) == 0:
        return 0.0
    
    diagram = diagram[np.isfinite(diagram).all(axis=1)]
    
    if len(diagram) == 0:
        return 0.0
    
    lifetimes = diagram[:, 1] - diagram[:, 0]
    return float(np.sum(lifetimes ** power))


def number_of_features(diagram, threshold=0.0):
    """Count features with persistence above threshold"""
    if len(diagram) == 0:
        return 0
    
    diagram = diagram[np.isfinite(diagram).all(axis=1)]
    
    if len(diagram) == 0:
        return 0
    
    lifetimes = diagram[:, 1] - diagram[:, 0]
    return int(np.sum(lifetimes > threshold))


# ==================== COMPLETE TDA FEATURE EXTRACTOR ====================

class TDAFeatureExtractor:
    """
    Complete TDA feature extractor using ripser
    """
    
    def __init__(self, embedding_dim=3, time_delay=1, max_homology_dim=1):
        self.embedding_dim = embedding_dim
        self.time_delay = time_delay
        self.max_homology_dim = max_homology_dim
        
    def extract_features(self, time_series):
        """
        Extract comprehensive TDA features from time series
        
        Returns:
            numpy array of 12 features:
            - H0 entropy
            - H1 entropy  
            - H0 max persistence
            - H1 max persistence
            - H0 total persistence
            - H1 total persistence
            - H0 number of features
            - H1 number of features
            - H0 landscape amplitude
            - H1 landscape amplitude
            - H0 betti number (at midpoint)
            - H1 betti number (at midpoint)
        """
        try:
            # Step 1: Takens embedding
            embedded = takens_embedding(time_series, self.embedding_dim, self.time_delay)
            
            # Step 2: Compute persistence diagrams
            diagrams = compute_persistence_diagrams(embedded, 
                                                   max_dimension=self.max_homology_dim,
                                                   max_edge_length=np.inf)
            
            # Step 3: Extract features from each homology dimension
            features = []
            
            for dim in range(self.max_homology_dim + 1):
                diagram = diagrams[dim]
                
                # Entropy
                features.append(persistence_entropy(diagram))
                
                # Max persistence
                features.append(max_persistence(diagram))
                
                # Total persistence
                features.append(total_persistence(diagram, power=1))
                
                # Number of significant features
                threshold = 0.1 * max_persistence(diagram) if len(diagram) > 0 else 0.0
                features.append(float(number_of_features(diagram, threshold)))
                
                # Landscape amplitude
                landscape = persistence_landscape(diagram, k=1, resolution=50)
                features.append(float(np.max(landscape)) if len(landscape) > 0 else 0.0)
                
                # Betti number at midpoint of filtration
                if len(diagram) > 0:
                    diagram_finite = diagram[np.isfinite(diagram).all(axis=1)]
                    if len(diagram_finite) > 0:
                        midpoint = (np.min(diagram_finite[:, 0]) + np.max(diagram_finite[:, 1])) / 2
                        features.append(float(betti_numbers(diagram, midpoint)))
                    else:
                        features.append(0.0)
                else:
                    features.append(0.0)
            
            return np.array(features[:12], dtype=np.float32)
            
        except Exception as e:
            print(f"Warning: TDA extraction failed: {e}")
            return np.zeros(12, dtype=np.float32)


# ==================== TESTING ====================

if __name__ == "__main__":
    print("Testing TDA from Scratch\n")
    print("=" * 60)
    
    # Test 1: Simple sine wave (should have periodic structure)
    print("\n1. Testing on sine wave (periodic signal):")
    t = np.linspace(0, 4 * np.pi, 100)
    sine_wave = np.sin(t) + 0.1 * np.random.randn(100)
    
    extractor = TDAFeatureExtractor(embedding_dim=3, time_delay=2, max_homology_dim=1)
    features_sine = extractor.extract_features(sine_wave)
    
    print(f"   Features extracted: {features_sine}")
    print(f"   H0 entropy: {features_sine[0]:.4f}")
    print(f"   H1 entropy: {features_sine[1]:.4f}")
    print(f"   H1 max persistence: {features_sine[3]:.4f} (should be > 0 for periodic)")
    
    # Test 2: Random noise (should have minimal structure)
    print("\n2. Testing on random noise:")
    noise = np.random.randn(100)
    
    features_noise = extractor.extract_features(noise)
    
    print(f"   Features extracted: {features_noise}")
    print(f"   H0 entropy: {features_noise[0]:.4f}")
    print(f"   H1 entropy: {features_noise[1]:.4f}")
    print(f"   H1 max persistence: {features_noise[3]:.4f} (should be ~0 for noise)")
    
    # Test 3: Trend (should show different topology)
    print("\n3. Testing on trend:")
    trend = np.linspace(0, 10, 100) + 0.5 * np.random.randn(100)
    
    features_trend = extractor.extract_features(trend)
    
    print(f"   Features extracted: {features_trend}")
    print(f"   H0 entropy: {features_trend[0]:.4f}")
    print(f"   H1 max persistence: {features_trend[3]:.4f}")
    
    print("\n" + "=" * 60)
    print("✅ TDA from scratch is working!")
    print("\nKey insights:")
    print("- Periodic signals show H1 features (loops in phase space)")
    print("- Random noise shows minimal topological structure")
    print("- Different dynamics produce different topological signatures")