# BigQuery Data Preparation and Feature Extraction for Liquid Neural Networks

This notebook demonstrates how to:

1. Extract and prepare data from BigQuery tables using BigFrames
2. Process features through Restricted Boltzmann Machines (RBMs)
3. Feed the RBM output into a CfC-based liquid neural network with LSTM neurons for gating
4. Implement a motor neuron that outputs a value to trigger deeper exploration

The pipeline is designed to handle terabyte-sized tables efficiently through chunked processing.

## Setup and Imports

In [1]:
# BigQuery Data Preparation and Feature Extraction for Liquid Neural Networks

# This notebook demonstrates how to:
# 1. Extract and prepare data from BigQuery tables using BigFrames
# 2. Process features through Restricted Boltzmann Machines (RBMs)
# 3. Feed the RBM output into a CfC-based liquid neural network with LSTM neurons for gating
# 4. Implement a motor neuron that outputs a value to trigger deeper exploration

# The pipeline is designed to handle terabyte-sized tables efficiently through chunked processing.

# Install required packages if needed
# !pip install google-cloud-bigquery bigframes

# Import required libraries
import os
import bigframes.pandas as bf
import matplotlib.pyplot as plt
import logging
import time
from typing import Dict, List, Optional, Tuple, Union, Any, Generator

# Import emberharmony instead of NumPy and TensorFlow
import ember_ml as eh
from ember_ml import ops
from ember_ml import nn
from ember_ml.backend import get_backend

# Print the current backend
current_backend = get_backend()
print(f"Using {current_backend} backend")

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('bigquery_pipeline')

# Import our components
from ember_ml.nn.features.terabyte_feature_extractor_bigframes import TerabyteFeatureExtractor, TerabyteTemporalStrideProcessor
from ember_ml.models.optimized_rbm import OptimizedRBM
from ember_ml.models.stride_aware_cfc import (
    create_liquid_network_with_motor_neuron,
    create_lstm_gated_liquid_network,
    create_multi_stride_liquid_network
)


DEBUG: get_stats_module - Backend base module name: ember_ml.backend.mlx
DEBUG: get_stats_module - Constructed module name: ember_ml.backend.mlx.stats
DEBUG: get_stats_module - Successfully imported module: ember_ml.backend.mlx.stats
Using mlx backend


## BigQuery Connection Setup

Set up the connection to BigQuery. You can use service account credentials or application default credentials.

In [2]:
# BigQuery Connection Setup
#
# Set up the connection to BigQuery. You can use service account credentials or application default credentials.

# Set your GCP project ID
PROJECT_ID = "massmkt-poc"  # Replace with your project ID

# Path to service account credentials (optional)
CREDENTIALS_PATH = '/Users/sydneybach/sydney-bach.json'  # Replace with path to credentials.json if needed

# BigQuery location
LOCATION = "US"

# Import BigFrames
import bigframes.pandas as bf

# Set BigFrames options
bf.options.bigquery.project = PROJECT_ID
bf.options.bigquery.location = LOCATION

# Initialize the feature extractor
from ember_ml.nn.features.terabyte_feature_extractor_bigframes import TerabyteFeatureExtractor

feature_extractor = TerabyteFeatureExtractor(
    project_id=PROJECT_ID,
    location=LOCATION,
    chunk_size=100000,
    max_memory_gb=16.0,
    verbose=True
)

# Set up BigQuery connection
feature_extractor.setup_bigquery_connection(CREDENTIALS_PATH)

print(f"Connected to BigQuery project: {PROJECT_ID}")
print(f"Using location: {LOCATION}")
print(f"Feature extractor initialized with BigFrames support")

2025-04-22 08:36:38,620 - terabyte_feature_extractor - INFO - Using mlx backend for computation
2025-04-22 08:36:38,621 - terabyte_feature_extractor - INFO - Initialized TerabyteFeatureExtractor with chunk_size=100000, max_memory_gb=16.0
2025-04-22 08:36:38,648 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Connected to BigQuery project: massmkt-poc
2025-04-22 08:36:38,648 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Using location: US


Connected to BigQuery project: massmkt-poc
Using location: US
Feature extractor initialized with BigFrames support


## Explore Available Tables

Let's explore the available tables in your BigQuery project.

In [3]:
# Explore Available Tables
#
# Let's explore the available tables in your BigQuery project.

# Import BigQuery client
from google.cloud import bigquery

# Create client
client = bigquery.Client(project=PROJECT_ID)

# List datasets
datasets = list(client.list_datasets())
print(f"Datasets in project {PROJECT_ID}:")
for dataset in datasets:
    print(f"- {dataset.dataset_id}")

# Choose a dataset to explore
if datasets:
    dataset_id = datasets[0].dataset_id
    print(f"\nTables in dataset {dataset_id}:")
    tables = list(client.list_tables(dataset_id))
    for table in tables:
        print(f"- {table.table_id}")
        
    # Get more details about the first table
    if tables:
        first_table = tables[0]
        table_ref = f"{dataset_id}.{first_table.table_id}"
        table = client.get_table(table_ref)
        
        print(f"\nDetails for table {table_ref}:")
        print(f"Description: {table.description}")
        print(f"Row count: {table.num_rows}")
        print(f"Created: {table.created}")
        print(f"Last modified: {table.modified}")
        
        print("\nSchema:")
        for field in table.schema:
            print(f"- {field.name} ({field.field_type})")
            
        # Preview data
        print("\nPreview data:")
        query = f"SELECT * FROM `{PROJECT_ID}.{table_ref}` LIMIT 5"
        query_job = client.query(query)
        results = query_job.result()
        
        for row in results:
            print(row)
else:
    print("No datasets found in this project.")

Datasets in project massmkt-poc:
- 180601513
- 239703222
- 242584221
- 267229650
- BQML_Datasets
- BigQuery_Google_Ads
- Event_Data_Dictionary
- FieldOps_Reporting_Dataset
- MM_LP_Data
- MM_LP_POC
- Marketing_Cloud
- Monitored_Zipcodes_Expanded
- Partners
- QF_AIOPS
- QF_Activation_POC
- QF_BUY_FLOW_TRANSACTIONS
- QF_Shell_Account_CleanUp_Data
- Service_Appointment_History
- TEST1
- abandoned_jobs
- analytics_251783832
- analytics_379694883
- analytics_405473592
- analytics_424581992
- analytics_435146347
- analytics_451204749
- biwf_mysql_db
- confluent_sink
- connected_communities_dev
- connected_communities_prod
- contact_engine
- datafirst_prod
- datalake_ingestion_sandbox
- design_repair
- dev_sandbox
- dispatch_events
- dispatch_events_test
- ds_mmdldev_bluemarble
- ds_mmdldev_bluemarble_raw
- ds_mmdldev_bluemarble_stg
- ds_mmdldev_kafka_master
- ds_mmdldev_nokiahal_raw
- ds_mmdldev_pc360_raw
- ds_mmpoc_dev_consmobile_silver
- ds_mmpoc_master
- ds_mmpoc_master_uscentral
- dwh_sta

## Extract Features from BigQuery

Now let's extract features from a BigQuery table. Replace `TABLE_ID` with the table you want to use.

In [None]:
# Set the table ID
TABLE_ID = "TEST1.ctl_modem_speedtest_event"  # Replace with your table ID

# Set the target column (optional)
TARGET_COLUMN = 'downloadLatency'  # Replace with your target column if needed

# Set a limit for testing (remove for full dataset)
LIMIT = 10000

# Import the BigFrames-only version of the feature extractor
import sys
import os

# Add the emberharmony directory to the Python path if needed
if not any(p.endswith('emberharmony') for p in sys.path):
    sys.path.append(os.path.abspath(os.path.join(os.getcwd())))

# Import emberharmony instead of NumPy
from ember_ml import ops
from ember_ml.ops import get_backend

# Import BigFrames
import bigframes.pandas as bf

# Print the current backend
current_backend = get_backend()
print(f"Using {current_backend} backend")

# Import the BigFrames-only version of the feature extractor
from ember_ml.nn.features.terabyte_feature_extractor_bigframes import TerabyteFeatureExtractor, TerabyteTemporalStrideProcessor

# Initialize the feature extractor
feature_extractor = TerabyteFeatureExtractor(
    project_id=PROJECT_ID,
    location=LOCATION,
    chunk_size=100000,
    max_memory_gb=16.0,
    verbose=True
)

# Set up BigQuery connection
feature_extractor.setup_bigquery_connection(CREDENTIALS_PATH)

# Extract features
result = feature_extractor.prepare_data(
    table_id=TABLE_ID,
    target_column=TARGET_COLUMN,
    limit=LIMIT,
    force_categorical_columns=[
        "eventType", "eventSource", "eventCategory", "eventPublisherId",
        "productClass", "downloadTestStatus", "uploadState", "uploadTestStatus",
        "wtn", "serialNumber"
    ]
)

if result is not None:
    train_df, val_df, test_df, train_features, val_features, test_features, scaler, imputer = result
    
    print(f"Train shape: {train_df.shape}")
    print(f"Validation shape: {val_df.shape}")
    print(f"Test shape: {test_df.shape}")
    print(f"Features: {train_features}")
    
    # Now we can use these DataFrames directly with BigFrames operations
    # For example, to get the first few rows of the training data:
    print("\nFirst few rows of training data:")
    print(train_df.head())
    
    # Or to get summary statistics:
    print("\nSummary statistics for training data:")
    print(train_df[train_features].describe())
    
    # Define a function to convert BigFrames to emberharmony tensors
    def bigframes_to_tensor(bf_df, columns):
        """
        Convert BigFrames DataFrame to emberharmony tensor.
        
        Args:
            bf_df: BigFrames DataFrame
            columns: Columns to include
            
        Returns:
            emberharmony tensor
        """
        # We need to convert to numpy array first, then to emberharmony tensor
        # This is a temporary step until BigFrames supports direct conversion
        array_data = bf_df[columns].to_numpy()
        from ember_ml.nn import tensor
        return tensor.convert_to_tensor(array_data)
    
    # Convert BigFrames DataFrames directly to emberharmony tensors
    print("\nConverting BigFrames DataFrames to emberharmony tensors...")
    train_tensor = bigframes_to_tensor(train_df, train_features)
    val_tensor = bigframes_to_tensor(val_df, val_features)
    test_tensor = bigframes_to_tensor(test_df, test_features)
    
    print("\nConverted to emberharmony tensors for GPU acceleration")
    print(f"Train tensor shape: {ops.shape(train_tensor)}")
    print(f"Validation tensor shape: {ops.shape(val_tensor)}")
    print(f"Test tensor shape: {ops.shape(test_tensor)}")
    
    # Perform some basic operations with emberharmony ops
    print("\nBasic statistics using emberharmony ops:")
    print(f"Mean of train features: {ops.stats.mean(train_tensor, axis=0)[:5]}...")  # Show first 5 means
    print(f"Standard deviation of train features: {ops.sqrt(ops.stats.mean(ops.square(train_tensor - ops.stats.means.mean(train_tensor, axis=0)), axis=0))[:5]}...")  # Show first 5 stds
    print(f"Min of train features: {stats.min(train_tensor, axis=0)[:5]}...")  # Show first 5 mins
    print(f"Max of train features: {stats.max(train_tensor, axis=0)[:5]}...")  # Show first 5 maxs
else:
    print("Feature extraction failed")
    # Create empty variables to avoid NameError in subsequent cells
    import bigframes.pandas as bf
    train_df = bf.DataFrame()
    val_df = bf.DataFrame()
    test_df = bf.DataFrame()
    train_features = []
    val_features = []
    test_features = []
    train_tensor = ops.zeros((0, 0))
    val_tensor = ops.zeros((0, 0))
    test_tensor = ops.zeros((0, 0))

2025-04-22 08:36:45,350 - terabyte_feature_extractor - INFO - Using mlx backend for computation
2025-04-22 08:36:45,351 - terabyte_feature_extractor - INFO - Initialized TerabyteFeatureExtractor with chunk_size=100000, max_memory_gb=16.0
2025-04-22 08:36:45,379 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Connected to BigQuery project: massmkt-poc
2025-04-22 08:36:45,380 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Using location: US
2025-04-22 08:36:45,380 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Preparing data from table: TEST1.ctl_modem_speedtest_event
2025-04-22 08:36:45,381 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Target column: downloadLatency
2025-04-22 08:36:45,381 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Limit: 10000


Using mlx backend


2025-04-22 08:36:50,513 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Loaded 241 rows from BigQuery
2025-04-22 08:36:50,514 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Columns: ['eventId', 'eventType', 'eventSource', 'eventCategory', 'eventPublisherId', 'eventTimestamp', 'wtn', 'serialNumber', 'productClass', 'downloadThroughput', 'downloadLatency', 'downloadTestStatus', 'uploadState', 'uploadThroughput', 'uploadTestStatus']
2025-04-22 08:36:50,514 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Processing data...
2025-04-22 08:36:50,516 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Categorical columns: ['eventId', 'eventType', 'eventSource', 'eventCategory', 'eventPublisherId', 'eventTimestamp', 'wtn', 'serialNumber', 'productClass', 'downloadThroughput', 'downloadLatency', 'downloadTestStatus', 'uploadState', 'uploadThroughput', 'uploadTestStatus']
2025-04-22 08:36:50,516 - ember_ml.nn.feature

2025-04-22 08:36:53,433 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Conversion to pandas DataFrame took 2.92 seconds.
2025-04-22 08:36:53,434 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Applying sklearn OneHotEncoder to columns: ['eventId', 'eventType', 'eventSource', 'eventCategory', 'eventPublisherId', 'eventTimestamp', 'wtn', 'serialNumber', 'productClass', 'downloadThroughput', 'downloadLatency', 'downloadTestStatus', 'uploadState', 'uploadThroughput', 'uploadTestStatus']
2025-04-22 08:36:53,435 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Applying robust string conversion to column 'eventId'.
2025-04-22 08:36:53,436 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Applying robust string conversion to column 'eventType'.
2025-04-22 08:36:53,437 - ember_ml.nn.features.terabyte_feature_extractor_bigframes - INFO - Applying robust string conversion to column 'eventSource'.
2025-04-22 08:36:53

Train shape: (168, 943)
Validation shape: (36, 943)
Test shape: (37, 943)
Features: ['eventId_0071b08a-a6de-4d31-8076-7c5f3185a26d', 'eventId_00d19697-47b6-483a-9318-2b036b8255dd', 'eventId_017f4c5b-1638-4f27-99af-0f1a9719514a', 'eventId_02d395d9-182c-47c4-bd9c-4cc64a5b28fc', 'eventId_0315f760-b158-4f76-9617-5055a3d2cc9b', 'eventId_040682e7-186d-4b83-add4-aa89787c7286', 'eventId_0551eeb4-6b24-4519-84e2-3a297d6a1df8', 'eventId_05f352e3-4003-48e4-a572-ce06a85abf12', 'eventId_077117fc-54e3-4323-972b-286446e21c82', 'eventId_078017fc-6234-46ec-9536-df0a9a111e2a', 'eventId_0883c266-d4c2-426f-a649-0e71eb21d444', 'eventId_0946766f-a26a-4130-a964-f52eaccbb0bf', 'eventId_09f53b24-ce2f-4216-b89a-ca43b813c00c', 'eventId_0b5f6ed7-a811-4fa4-bbb4-0a6754505334', 'eventId_0dab9718-c9fa-4281-9361-a96fa34956fa', 'eventId_0dd36ad1-8cdb-436a-a77b-cdd2dafede61', 'eventId_1368337e-09ff-47e6-9ac8-fcbaabc79e8b', 'eventId_13833680-2235-4d31-a417-f01d2eb0053c', 'eventId_14132fdf-483d-4147-8c05-c10d78bd3dbe', 'ev

AttributeError: module 'ember_ml.ops' has no attribute 'shape'

## Apply Temporal Stride Processing

Now let's apply temporal stride processing to the extracted features.

In [None]:
# Import emberharmony instead of NumPy
from ember_ml import ops
from ember_ml.ops import get_backend
from ember_ml.nn import tensor
import matplotlib.pyplot as plt

# Import BigFrames
import bigframes.pandas as bf

# Print the current backend
current_backend = get_backend()
print(f"Using {current_backend} backend")

# Create temporal processor
temporal_processor = TerabyteTemporalStrideProcessor(
    window_size=10,
    stride_perspectives=[1, 3, 5],
    pca_components=32,
    batch_size=10000,
    use_incremental_pca=True,
    verbose=True
)

# Define a function to convert BigFrames to emberharmony tensors
def bigframes_to_tensor(bf_df, columns):
    """
    Convert BigFrames DataFrame to emberharmony tensor.
    
    Args:
        bf_df: BigFrames DataFrame
        columns: Columns to include
        
    Returns:
        emberharmony tensor
    """
    # We need to convert to numpy array first, then to emberharmony tensor
    # This is a temporary step until BigFrames supports direct conversion
    array_data = bf_df[columns].to_numpy()
    return tensor.convert_to_tensor(array_data)

# Define a generator to yield data in batches
def data_generator(df, features, batch_size=10000):
    # Convert BigFrames DataFrame to emberharmony tensors in batches
    for i in range(0, len(df), batch_size):
        # Get a batch of data
        batch = df.iloc[i:i+batch_size]
        
        # Convert directly to emberharmony tensor using our helper function
        yield bigframes_to_tensor(batch, features)

# Process data - make sure train_df and train_features are defined
if len(train_df) > 0 and len(train_features) > 0:
    # Process the data through the temporal stride processor
    stride_perspectives = temporal_processor.process_large_dataset(
        data_generator(train_df, train_features, batch_size=10000)
    )
    
    # Print stride perspective shapes
    for stride, data in stride_perspectives.items():
        # Use ops.shape instead of .shape
        print(f"Stride {stride}: shape {ops.shape(data)}")
    
    # Visualize explained variance for each stride
    explained_variances = [temporal_processor.get_explained_variance(stride) for stride in stride_perspectives.keys()]
    plt.figure(figsize=(10, 6))
    plt.bar(stride_perspectives.keys(), explained_variances)
    plt.xlabel('Stride Length')
    plt.ylabel('Explained Variance Ratio')
    plt.title('Explained Variance by Stride Length')
    plt.show()
    
    # Visualize feature importance for the first stride
    first_stride = list(stride_perspectives.keys())[0]
    feature_importance = temporal_processor.get_feature_importance(first_stride)
    
    if feature_importance is not None:
        # Reshape to match the original feature dimensions
        window_size = temporal_processor.window_size
        feature_dim = len(train_features)
        
        # Use ops.reshape instead of .reshape
        reshaped_importance = ops.reshape(feature_importance, (window_size, feature_dim))
        
        # Convert to numpy for matplotlib
        reshaped_importance_np = ops.to_numpy(reshaped_importance)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(reshaped_importance_np, cmap='viridis', aspect='auto')
        plt.colorbar(label='Feature Importance')
        plt.xlabel('Feature Index')
        plt.ylabel('Time Step')
        plt.title(f'Feature Importance Across Time Steps (Stride {first_stride})')
        
        # Add feature names on x-axis if not too many
        if len(train_features) <= 20:
            plt.xticks(range(len(train_features)), train_features, rotation=90)
        
        plt.tight_layout()
        plt.show()
        
    # Apply temporal processing to create multi-stride features
    print("\nApplying temporal processing to create multi-stride features...")
    
    # Convert train_df directly to emberharmony tensor
    train_tensor = bigframes_to_tensor(train_df, train_features)
    
    # Process through each stride perspective
    multi_stride_features = {}
    for stride, pca in temporal_processor.pca_models.items():
        # Create windowed data
        windows = temporal_processor.create_windows(train_tensor, stride)
        
        # Flatten windows
        batch_size = ops.shape(windows)[0]
        flattened = ops.reshape(windows, (batch_size, -1))
        
        # Transform with PCA - use emberharmony ops for PCA if possible
        if hasattr(eh, 'pca'):
            # If emberharmony has PCA implementation
            transformed = eh.pca.transform(flattened, n_components=pca.n_components_)
        else:
            # Fallback to scikit-learn PCA which requires numpy
            transformed = pca.transform(ops.to_numpy(flattened))
            transformed = tensor.convert_to_tensor(transformed)
        
        # Store the transformed features
        multi_stride_features[stride] = transformed
        
        print(f"Stride {stride} features shape: {ops.shape(multi_stride_features[stride])}")
    
    # Demonstrate how to combine multi-stride features
    print("\nCombining multi-stride features...")
    
    # Get a list of all stride features
    stride_features_list = [multi_stride_features[stride] for stride in sorted(multi_stride_features.keys())]
    
    # Concatenate along feature dimension (axis 1)
    combined_features = ops.concatenate(stride_features_list, axis=1)
    
    print(f"Combined multi-stride features shape: {ops.shape(combined_features)}")
    
    # Calculate correlation between strides using emberharmony ops
    print("\nCalculating correlation between stride features...")
    
    # Calculate correlation matrix using emberharmony ops
    centered_features = combined_features - ops.stats.mean(combined_features, axis=0)
    corr_matrix = ops.matmul(
        ops.transpose(centered_features),
        centered_features
    ) / ops.shape(combined_features)[0]
    
    # Convert to numpy only for visualization with matplotlib
    corr_matrix_np = ops.to_numpy(corr_matrix)
    
    # Visualize correlation matrix
    plt.figure(figsize=(10, 8))
    plt.imshow(corr_matrix_np, cmap='coolwarm', vmin=-1, vmax=1)
    plt.colorbar()
    plt.title('Multi-stride Feature Correlations')
    plt.show()
    
    print("\nMulti-stride temporal processing complete!")
else:
    print("Cannot process data: train_df or train_features is empty")

## Train Restricted Boltzmann Machine and Create Liquid Neural Network

Now let's train an RBM on the extracted features, feed them into a liquid neural network, and analyze the results.

In [None]:
# Import required libraries
import matplotlib.pyplot as plt
import os

# Import emberharmony instead of TensorFlow and NumPy
from ember_ml import ops
from ember_ml.ops import get_backend

# Import BigFrames
import bigframes.pandas as bf

# Import the RBM and liquid network components
from ember_ml.models.optimized_rbm import OptimizedRBM
from ember_ml.models.stride_aware_cfc import (
    create_liquid_network_with_motor_neuron,
    create_lstm_gated_liquid_network,
    create_multi_stride_liquid_network
)

# Print the current backend
current_backend = get_backend()
print(f"Using {current_backend} backend")

# Define a function to convert BigFrames to emberharmony tensors
def bigframes_to_tensor(bf_df, columns):
    """
    Convert BigFrames DataFrame to emberharmony tensor.
    
    Args:
        bf_df: BigFrames DataFrame
        columns: Columns to include
        
    Returns:
        emberharmony tensor
    """
    # We need to convert to numpy array first, then to emberharmony tensor
    # This is a temporary step until BigFrames supports direct conversion
    array_data = bf_df[columns].to_numpy()
    return tensor.convert_to_tensor(array_data)

# Create RBM
if len(train_features) > 0:
    # Initialize RBM
    rbm = OptimizedRBM(
        n_visible=len(train_features),
        n_hidden=64,
        learning_rate=0.01,
        momentum=0.5,
        weight_decay=0.0001,
        batch_size=100,
        use_binary_states=False,
        use_gpu=True,
        verbose=True
    )
    
    # Define a generator to yield data in batches directly from BigFrames
    def rbm_data_generator(bf_df, features, batch_size=100):
        # Get total size
        total_size = len(bf_df)
        
        # Create random indices for shuffling
        random_values = tensor.random_uniform((total_size,))
        indices = ops.to_numpy(ops.argsort(random_values))
        
        # Process in batches
        for i in range(0, total_size, batch_size):
            end_idx = min(i + batch_size, total_size)
            batch_indices = indices[i:end_idx]
            
            # Get batch from BigFrames DataFrame
            batch = bf_df.iloc[batch_indices]
            
            # Convert directly to emberharmony tensor
            yield bigframes_to_tensor(batch, features)
    
    # Train RBM
    training_errors = rbm.train_in_chunks(
        rbm_data_generator(train_df, train_features, batch_size=100),
        epochs=10,
        k=1
    )
    
    # Plot training errors
    plt.figure(figsize=(10, 6))
    plt.plot(ops.to_numpy(training_errors))
    plt.xlabel('Epoch')
    plt.ylabel('Reconstruction Error')
    plt.title('RBM Training Error')
    plt.show()
    
    # Extract RBM features
    def feature_generator(bf_df, features, batch_size=1000):
        # Get total size
        total_size = len(bf_df)
        
        # Process in batches
        for i in range(0, total_size, batch_size):
            end_idx = min(i + batch_size, total_size)
            
            # Get batch from BigFrames DataFrame
            batch = bf_df.iloc[i:end_idx]
            
            # Convert directly to emberharmony tensor
            yield bigframes_to_tensor(batch, features)
    
    # Extract features from RBM
    train_rbm_features = rbm.transform_in_chunks(
        feature_generator(train_df, train_features, batch_size=1000)
    )
    
    val_rbm_features = rbm.transform_in_chunks(
        feature_generator(val_df, val_features, batch_size=1000)
    )
    
    test_rbm_features = rbm.transform_in_chunks(
        feature_generator(test_df, test_features, batch_size=1000)
    )
    
    # Convert to emberharmony tensors if they aren't already
    train_rbm_features = tensor.convert_to_tensor(train_rbm_features)
    val_rbm_features = tensor.convert_to_tensor(val_rbm_features)
    test_rbm_features = tensor.convert_to_tensor(test_rbm_features)
    
    print(f"Train RBM features shape: {ops.shape(train_rbm_features)}")
    print(f"Validation RBM features shape: {ops.shape(val_rbm_features)}")
    print(f"Test RBM features shape: {ops.shape(test_rbm_features)}")
    
    # Visualize RBM feature distributions
    plt.figure(figsize=(12, 8))
    
    # Plot histograms for first 16 RBM features
    for i in range(min(16, ops.shape(train_rbm_features)[1])):
        plt.subplot(4, 4, i+1)
        # Convert to numpy for matplotlib
        feature_np = ops.to_numpy(train_rbm_features[:, i])
        plt.hist(feature_np, bins=30, alpha=0.7)
        plt.title(f'Feature {i+1}')
        plt.tight_layout()
    
    plt.suptitle('RBM Feature Distributions', y=1.02)
    plt.show()
    
    # Visualize feature correlations
    plt.figure(figsize=(10, 8))
    # Use emberharmony ops for correlation
    centered_features = train_rbm_features - ops.stats.mean(train_rbm_features, axis=0)
    corr_matrix = ops.matmul(
        ops.transpose(centered_features),
        centered_features
    ) / ops.shape(train_rbm_features)[0]
    
    # Convert to numpy only for visualization with matplotlib
    corr_matrix_np = ops.to_numpy(corr_matrix)
    
    plt.imshow(corr_matrix_np, cmap='coolwarm', vmin=-1, vmax=1)
    plt.colorbar()
    plt.title('RBM Feature Correlations')
    plt.show()
    
    # Create dummy targets for demonstration
    # In a real application, you would use actual targets from your data
    train_targets = tensor.random_uniform((ops.shape(train_rbm_features)[0], 1))
    val_targets = tensor.random_uniform((ops.shape(val_rbm_features)[0], 1))
    test_targets = tensor.random_uniform((ops.shape(test_rbm_features)[0], 1))
    
    # Reshape RBM features for sequence input
    train_rbm_seq = ops.reshape(train_rbm_features, 
                               (ops.shape(train_rbm_features)[0], 1, ops.shape(train_rbm_features)[1]))
    val_rbm_seq = ops.reshape(val_rbm_features,
                             (ops.shape(val_rbm_features)[0], 1, ops.shape(val_rbm_features)[1]))
    test_rbm_seq = ops.reshape(test_rbm_features,
                              (ops.shape(test_rbm_features)[0], 1, ops.shape(test_rbm_features)[1]))
    
    # Create liquid neural network using emberharmony's built-in components
    liquid_network = create_liquid_network_with_motor_neuron(
        input_dim=ops.shape(train_rbm_features)[1],
        units=128,
        output_dim=1,
        sparsity_level=0.5,
        stride_length=1,
        time_scale_factor=1.0,
        threshold=0.5,
        adaptive_threshold=True,
        mixed_memory=True
    )
    
    # Set up callbacks using emberharmony's callback system
    callbacks = [
        # Early stopping
        eh.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True
        ),
        
        # Learning rate scheduling
        eh.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-6
        ),
        
        # Logging
        eh.callbacks.ModelCheckpoint(
            filepath='./models/liquid_network_checkpoint',
            monitor='val_loss',
            save_best_only=True
        )
    ]
    
    # Train liquid network using emberharmony's training API
    history = liquid_network.fit(
        train_rbm_seq,
        train_targets,
        validation_data=(val_rbm_seq, val_targets),
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )
    
    # Plot training history
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history['loss'], label='Train')
    plt.plot(history['val_loss'], label='Validation')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history['mae'], label='Train')
    plt.plot(history['val_mae'], label='Validation')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.title('Mean Absolute Error')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Process test data
    outputs = liquid_network.predict(test_rbm_seq)
    
    # Extract motor neuron outputs and trigger signals
    if isinstance(outputs, list):
        motor_outputs = outputs[0]
        trigger_signals = outputs[1][0]  # First element is trigger
        threshold_values = outputs[1][1]  # Second element is threshold
    else:
        motor_outputs = outputs
        trigger_signals = ops.cast(motor_outputs > 0.5, ops.float32)
        threshold_values = ops.full_like(motor_outputs, 0.5)
    
    # Print statistics
    print(f"Motor neuron output range: {stats.min(motor_outputs):.4f} to {stats.max(motor_outputs):.4f}")
    print(f"Trigger rate: {ops.stats.mean(trigger_signals):.4f}")
    
    # Plot motor neuron outputs and triggers
    plt.figure(figsize=(12, 8))
    
    plt.subplot(2, 1, 1)
    plt.plot(ops.to_numpy(motor_outputs[:100]), label='Motor Neuron Output')
    plt.plot(ops.to_numpy(threshold_values[:100]), 'r--', label='Threshold')
    plt.xlabel('Sample')
    plt.ylabel('Output Value')
    plt.title('Motor Neuron Output and Threshold')
    plt.legend()
    
    plt.subplot(2, 1, 2)
    plt.plot(ops.to_numpy(trigger_signals[:100]), 'g', label='Trigger Signal')
    plt.axhline(y=ops.to_numpy(ops.stats.mean(trigger_signals)), color='r', linestyle='--', 
               label=f'Trigger Rate: {ops.to_numpy(ops.stats.mean(trigger_signals)):.2f}')
    plt.xlabel('Sample')
    plt.ylabel('Trigger (0/1)')
    plt.title('Exploration Trigger Signals')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Analyze triggered samples
    triggered_indices = ops.where(trigger_signals == 1)
    non_triggered_indices = ops.where(trigger_signals == 0)
    
    print(f"Number of triggered samples: {ops.shape(triggered_indices)[0]}")
    print(f"Number of non-triggered samples: {ops.shape(non_triggered_indices)[0]}")
    
    # Compare RBM features for triggered vs. non-triggered samples
    if ops.shape(triggered_indices)[0] > 0 and ops.shape(non_triggered_indices)[0] > 0:
        # Calculate mean features
        triggered_mean = ops.stats.mean(ops.gather(test_rbm_features, triggered_indices), axis=0)
        non_triggered_mean = ops.stats.mean(ops.gather(test_rbm_features, non_triggered_indices), axis=0)
        
        # Calculate feature difference
        feature_diff = ops.subtract(triggered_mean, non_triggered_mean)
        
        # Plot feature difference
        plt.figure(figsize=(12, 6))
        plt.bar(range(ops.shape(feature_diff)[0]), ops.to_numpy(feature_diff))
        plt.xlabel('RBM Feature')
        plt.ylabel('Difference (Triggered - Non-triggered)')
        plt.title('Feature Difference Between Triggered and Non-triggered Samples')
        plt.axhline(y=0, color='r', linestyle='--')
        plt.show()
        
        # Plot feature distributions for top 3 differentiating features
        top_features = ops.to_numpy(ops.argsort(ops.abs(feature_diff)))[-3:]
        
        plt.figure(figsize=(15, 5))
        for i, feature_idx in enumerate(top_features):
            plt.subplot(1, 3, i+1)
            
            # Get feature values for triggered and non-triggered samples
            triggered_features = ops.to_numpy(ops.gather(
                ops.gather(test_rbm_features, triggered_indices),
                [feature_idx], axis=1
            ))
            
            non_triggered_features = ops.to_numpy(ops.gather(
                ops.gather(test_rbm_features, non_triggered_indices),
                [feature_idx], axis=1
            ))
            
            plt.hist(triggered_features, bins=20, alpha=0.5, label='Triggered')
            plt.hist(non_triggered_features, bins=20, alpha=0.5, label='Non-triggered')
            plt.xlabel(f'Feature {feature_idx}')
            plt.ylabel('Count')
            plt.title(f'Feature {feature_idx} Distribution')
            plt.legend()
        
        plt.tight_layout()
        plt.show()
    
    # Save models
    os.makedirs('./models', exist_ok=True)
    
    # Save RBM
    rbm.save('./models/rbm.npy')
    print("RBM saved to ./models/rbm.npy")
    
    # Save liquid network
    liquid_network.save('./models/liquid_network')
    print("Liquid network saved to ./models/liquid_network")
    
else:
    print("Cannot train RBM: train_features is empty")

## Save Models

Let's save the trained models for future use.

In [None]:
# Save Models
#
# Let's save the trained models for future use.

import os
import pickle
import json

# Create directory for models
os.makedirs('./models', exist_ok=True)

# Save RBM if it exists
if 'rbm' in globals():
    rbm.save('./models/rbm.npy')
    print("RBM saved to ./models/rbm.npy")
    
    # Save RBM configuration
    rbm_config = {
        'n_visible': rbm.n_visible,
        'n_hidden': rbm.n_hidden,
        'learning_rate': rbm.learning_rate,
        'momentum': rbm.momentum,
        'weight_decay': rbm.weight_decay,
        'batch_size': rbm.batch_size,
        'use_binary_states': rbm.use_binary_states
    }
    
    with open('./models/rbm_config.json', 'w') as f:
        json.dump(rbm_config, f, indent=2)
    
    print("RBM configuration saved to ./models/rbm_config.json")
else:
    print("Cannot save RBM: not trained")

# Save liquid network if it exists
if 'liquid_network' in globals():
    liquid_network.save('./models/liquid_network')
    print("Liquid network saved to ./models/liquid_network")
    
    # Save liquid network configuration
    try:
        liquid_config = {
            'input_dim': liquid_network.input_shape[-1],
            'units': liquid_network.layers[1].units if hasattr(liquid_network.layers[1], 'units') else None,
            'output_dim': liquid_network.output_shape[-1],
            'sparsity_level': 0.5,  # Default value, might not be accurate
            'threshold': 0.5  # Default value, might not be accurate
        }
        
        with open('./models/liquid_network_config.json', 'w') as f:
            json.dump(liquid_config, f, indent=2)
        
        print("Liquid network configuration saved to ./models/liquid_network_config.json")
    except:
        print("Could not save liquid network configuration")
else:
    print("Cannot save liquid network: not trained")

# Save feature extractor configuration
if 'feature_extractor' in globals():
    feature_config = {
        'project_id': feature_extractor.project_id,
        'location': feature_extractor.location,
        'chunk_size': feature_extractor.chunk_size,
        'max_memory_gb': feature_extractor.max_memory_gb
    }
    
    with open('./models/feature_extractor_config.json', 'w') as f:
        json.dump(feature_config, f, indent=2)
    
    print("Feature extractor configuration saved to ./models/feature_extractor_config.json")
    
    # Save feature names
    if 'train_features' in globals() and train_features:
        with open('./models/feature_names.json', 'w') as f:
            json.dump(train_features, f, indent=2)
        
        print("Feature names saved to ./models/feature_names.json")

# Save scaler and imputer if they exist
if 'scaler' in globals() and scaler is not None:
    with open('./models/scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)
    
    print("Scaler saved to ./models/scaler.pkl")

if 'imputer' in globals() and imputer is not None:
    with open('./models/imputer.pkl', 'wb') as f:
        pickle.dump(imputer, f)
    
    print("Imputer saved to ./models/imputer.pkl")

print("\nAll models and configurations saved to ./models/ directory")

## Using the Integrated Pipeline

Now let's demonstrate how to use the integrated pipeline for a more streamlined workflow.

In [None]:
# Import required libraries
import os
import matplotlib.pyplot as plt

# Import emberharmony instead of TensorFlow and NumPy
from ember_ml import ops
from ember_ml import nn
from ember_ml.ops import get_backend

# Import BigFrames
import bigframes.pandas as bf

# Print the current backend
current_backend = get_backend()
print(f"Using {current_backend} backend")

# Import the integrated pipeline
from examples.notebooks.bigquery.pipeline_demo import IntegratedPipeline

# Define a function to convert BigFrames to emberharmony tensors
def bigframes_to_tensor(bf_df, columns):
    """
    Convert BigFrames DataFrame to emberharmony tensor.
    
    Args:
        bf_df: BigFrames DataFrame
        columns: Columns to include
        
    Returns:
        emberharmony tensor
    """
    # We need to convert to numpy array first, then to emberharmony tensor
    # This is a temporary step until BigFrames supports direct conversion
    array_data = bf_df[columns].to_numpy()
    return tensor.convert_to_tensor(array_data)

# Create integrated pipeline with BigFrames support
class BigFramesIntegratedPipeline(IntegratedPipeline):
    """
    Extended version of IntegratedPipeline that supports BigFrames DataFrames.
    """
    
    def initialize_feature_extractor(self, credentials_path=None):
        """
        Initialize the feature extractor with BigFrames support.
        
        Args:
            credentials_path: Optional path to service account credentials
        """
        # Import the BigFrames-only version of the feature extractor
        from ember_ml.nn.features.terabyte_feature_extractor_bigframes import TerabyteFeatureExtractor
        
        # Initialize the feature extractor
        self.feature_extractor = TerabyteFeatureExtractor(
            project_id=self.project_id,
            location="US",
            chunk_size=100000,
            max_memory_gb=16.0,
            verbose=self.verbose
        )
        
        # Set up BigQuery connection
        self.feature_extractor.setup_bigquery_connection(credentials_path)
        
        if self.verbose:
            print("Feature extractor initialized with BigFrames support")
    
    def extract_features(self, table_id, target_column=None, limit=None, force_categorical_columns=None):
        """
        Extract features from a BigQuery table using BigFrames.
        
        Args:
            table_id: BigQuery table ID (dataset.table)
            target_column: Target variable name
            limit: Optional row limit for testing
            force_categorical_columns: Columns to force as categorical
            
        Returns:
            Tuple: (train_features, val_features, test_features)
        """
        if self.feature_extractor is None:
            raise ValueError("Feature extractor not initialized. Call initialize_feature_extractor() first.")
        
        if self.verbose:
            print(f"Extracting features from {table_id}")
        
        # Extract features
        result = self.feature_extractor.prepare_data(
            table_id=table_id,
            target_column=target_column,
            limit=limit,
            force_categorical_columns=force_categorical_columns
        )
        
        if result is None:
            if self.verbose:
                print("Feature extraction failed")
            return None, None, None
        
        # Unpack result
        train_df, val_df, test_df, train_features, val_features, test_features, self.scaler, self.imputer = result
        
        # Store DataFrames
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        
        if self.verbose:
            print(f"Extracted {len(train_features)} features")
            print(f"Train shape: {train_df.shape}")
            print(f"Validation shape: {val_df.shape}")
            print(f"Test shape: {test_df.shape}")
        
        # Convert BigFrames DataFrames directly to emberharmony tensors
        train_features_array = bigframes_to_tensor(train_df, train_features)
        val_features_array = bigframes_to_tensor(val_df, val_features)
        test_features_array = bigframes_to_tensor(test_df, test_features)
        
        return train_features_array, val_features_array, test_features_array
    
    def apply_temporal_processing(self, features):
        """
        Apply temporal stride processing to features.
        
        Args:
            features: Feature tensor
            
        Returns:
            Dict: Stride perspectives
        """
        if self.temporal_processor is None:
            from ember_ml.nn.features.terabyte_feature_extractor_bigframes import TerabyteTemporalStrideProcessor
            
            self.temporal_processor = TerabyteTemporalStrideProcessor(
                window_size=10,
                stride_perspectives=self.stride_perspectives,
                pca_components=32,
                batch_size=10000,
                use_incremental_pca=True,
                verbose=self.verbose
            )
        
        # Process features through temporal processor
        # Wrap in a generator to match the expected interface
        def single_batch_generator(tensor):
            yield tensor
        
        stride_perspectives = self.temporal_processor.process_large_dataset(
            single_batch_generator(features)
        )
        
        if self.verbose:
            for stride, data in stride_perspectives.items():
                print(f"Stride {stride}: shape {ops.shape(data)}")
        
        return stride_perspectives
    
    def train_rbm(self, features, epochs=10):
        """
        Train RBM on features.
        
        Args:
            features: Feature tensor
            epochs: Number of epochs
            
        Returns:
            OptimizedRBM: Trained RBM
        """
        if self.verbose:
            print("Training RBM...")
        
        # Initialize RBM
        self.rbm = OptimizedRBM(
            n_visible=ops.shape(features)[1],
            n_hidden=self.rbm_hidden_units,
            learning_rate=0.01,
            momentum=0.5,
            weight_decay=0.0001,
            batch_size=100,
            use_binary_states=False,
            use_gpu=self.use_gpu,
            verbose=self.verbose
        )
        
        # Define a generator to yield data in batches
        def rbm_data_generator(tensor, batch_size=100):
            # Get total size
            total_size = ops.shape(tensor)[0]
            
            # Generate random indices for shuffling
            indices = ops.argsort(tensor.random_uniform((total_size,)))
            
            # Shuffle tensor
            shuffled = ops.gather(tensor, indices)
            
            # Yield batches
            for i in range(0, total_size, batch_size):
                end_idx = min(i + batch_size, total_size)
                yield ops.slice(shuffled, [i, 0], [end_idx - i, ops.shape(tensor)[1]])
        
        # Train RBM
        training_errors = self.rbm.train_in_chunks(
            rbm_data_generator(features),
            epochs=epochs,
            k=1
        )
        
        if self.verbose:
            print(f"RBM training complete. Final error: {training_errors[-1]:.4f}")
        
        return self.rbm
    
    def extract_rbm_features(self, features):
        """
        Extract features from trained RBM.
        
        Args:
            features: Feature tensor
            
        Returns:
            Tensor: RBM features
        """
        if self.rbm is None:
            raise ValueError("RBM not trained. Call train_rbm() first.")
        
        if self.verbose:
            print("Extracting RBM features...")
        
        # Define a generator to yield data in batches
        def feature_generator(tensor, batch_size=1000):
            # Get total size
            total_size = ops.shape(tensor)[0]
            
            # Yield batches
            for i in range(0, total_size, batch_size):
                end_idx = min(i + batch_size, total_size)
                yield ops.slice(tensor, [i, 0], [end_idx - i, ops.shape(tensor)[1]])
        
        # Extract features
        rbm_features = self.rbm.transform_in_chunks(
            feature_generator(features)
        )
        
        # Convert to emberharmony tensor if not already
        rbm_features = tensor.convert_to_tensor(rbm_features)
        
        if self.verbose:
            print(f"RBM features shape: {ops.shape(rbm_features)}")
        
        return rbm_features
    
    def train_liquid_network(self, features, targets, validation_data=None, epochs=50, batch_size=32, network_type='motor_neuron'):
        """
        Train liquid neural network on RBM features.
        
        Args:
            features: RBM feature tensor
            targets: Target tensor
            validation_data: Tuple of (val_features, val_targets)
            epochs: Number of epochs
            batch_size: Batch size
            network_type: Type of liquid network ('motor_neuron', 'lstm_gated', or 'multi_stride')
            
        Returns:
            Model: Trained liquid network
        """
        if self.verbose:
            print(f"Training {network_type} liquid network...")
        
        # Reshape features for sequence input
        features_seq = ops.reshape(features, (ops.shape(features)[0], 1, ops.shape(features)[1]))
        
        # Prepare validation data
        if validation_data is not None:
            val_features, val_targets = validation_data
            val_features_seq = ops.reshape(val_features, (ops.shape(val_features)[0], 1, ops.shape(val_features)[1]))
            validation_data = (val_features_seq, val_targets)
        
        # Create liquid network
        if network_type == 'motor_neuron':
            self.liquid_network = create_liquid_network_with_motor_neuron(
                input_dim=ops.shape(features)[1],
                units=self.cfc_units,
                output_dim=ops.shape(targets)[1],
                sparsity_level=self.sparsity_level,
                stride_length=1,
                time_scale_factor=1.0,
                threshold=self.threshold,
                adaptive_threshold=True,
                mixed_memory=True
            )
        elif network_type == 'lstm_gated':
            self.liquid_network = create_lstm_gated_liquid_network(
                input_dim=ops.shape(features)[1],
                cfc_units=self.cfc_units,
                lstm_units=self.lstm_units,
                output_dim=ops.shape(targets)[1],
                sparsity_level=self.sparsity_level
            )
        elif network_type == 'multi_stride':
            self.liquid_network = create_multi_stride_liquid_network(
                input_dim=ops.shape(features)[1],
                units=self.cfc_units,
                output_dim=ops.shape(targets)[1],
                stride_perspectives=self.stride_perspectives,
                sparsity_level=self.sparsity_level
            )
        else:
            raise ValueError(f"Unknown network type: {network_type}")
        
        # Set up callbacks
        callbacks = [
            # Early stopping
            eh.callbacks.EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True
            ),
            
            # Learning rate scheduling
            eh.callbacks.ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=1e-6
            ),
            
            # Model checkpoint
            eh.callbacks.ModelCheckpoint(
                filepath='./models/liquid_network_checkpoint',
                monitor='val_loss',
                save_best_only=True
            )
        ]
        
        # Train liquid network
        history = self.liquid_network.fit(
            features_seq,
            targets,
            validation_data=validation_data,
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1 if self.verbose else 0
        )
        
        if self.verbose:
            print("Liquid network training complete.")
        
        return self.liquid_network
    
    def process_data(self, features):
        """
        Process data through the trained liquid network.
        
        Args:
            features: RBM feature tensor
            
        Returns:
            Tuple: (motor_outputs, trigger_signals)
        """
        if self.liquid_network is None:
            raise ValueError("Liquid network not trained. Call train_liquid_network() first.")
        
        if self.verbose:
            print("Processing data through liquid network...")
        
        # Reshape features for sequence input
        features_seq = ops.reshape(features, (ops.shape(features)[0], 1, ops.shape(features)[1]))
        
        # Process data
        outputs = self.liquid_network.predict(features_seq)
        
        # Extract motor neuron outputs and trigger signals
        if isinstance(outputs, list):
            motor_outputs = outputs[0]
            trigger_signals = outputs[1][0]  # First element is trigger
        else:
            motor_outputs = outputs
            trigger_signals = ops.cast(motor_outputs > self.threshold, ops.float32)
        
        if self.verbose:
            print(f"Motor neuron output range: {stats.min(motor_outputs):.4f} to {stats.max(motor_outputs):.4f}")
            print(f"Trigger rate: {ops.stats.mean(trigger_signals):.4f}")
        
        return motor_outputs, trigger_signals

# Create integrated pipeline
pipeline = BigFramesIntegratedPipeline(
    project_id=PROJECT_ID,
    rbm_hidden_units=64,
    cfc_units=128,
    lstm_units=32,
    stride_perspectives=[1, 3, 5],
    sparsity_level=0.5,
    threshold=0.5,
    use_gpu=True,
    verbose=True
)

# Initialize feature extractor
pipeline.initialize_feature_extractor(CREDENTIALS_PATH)

# Extract features
train_features_pipeline, val_features_pipeline, test_features_pipeline = pipeline.extract_features(
    table_id=TABLE_ID,
    target_column=TARGET_COLUMN,
    limit=LIMIT,
    force_categorical_columns=[
        "eventType", "eventSource", "eventCategory", "eventPublisherId",
        "productClass", "downloadTestStatus", "uploadState", "uploadTestStatus",
        "wtn", "serialNumber"
    ]
)

# Check if features were extracted successfully
if train_features_pipeline is not None:
    # Apply temporal processing
    train_temporal = pipeline.apply_temporal_processing(train_features_pipeline)
    
    # Train RBM
    pipeline.train_rbm(train_features_pipeline, epochs=10)
    
    # Extract RBM features
    train_rbm_features_pipeline = pipeline.extract_rbm_features(train_features_pipeline)
    val_rbm_features_pipeline = pipeline.extract_rbm_features(val_features_pipeline)
    test_rbm_features_pipeline = pipeline.extract_rbm_features(test_features_pipeline)
    
    # Create dummy targets for demonstration
    train_targets_pipeline = tensor.random_uniform((ops.shape(train_rbm_features_pipeline)[0], 1))
    val_targets_pipeline = tensor.random_uniform((ops.shape(val_rbm_features_pipeline)[0], 1))
    
    # Train liquid network
    pipeline.train_liquid_network(
        features=train_rbm_features_pipeline,
        targets=train_targets_pipeline,
        validation_data=(val_rbm_features_pipeline, val_targets_pipeline),
        epochs=50,
        batch_size=32,
        network_type='lstm_gated'
    )
    
    # Process test data
    motor_outputs_pipeline, trigger_signals_pipeline = pipeline.process_data(test_rbm_features_pipeline)
    
    # Print results
    print(f"Processed {ops.shape(test_rbm_features_pipeline)[0]} test samples")
    print(f"Motor neuron output range: {stats.mints.min(motor_outputs_pipeline):.4f} to {stats.max(motor_outputs_pipeline):.4f}")
    print(f"Trigger rate: {ops.stats.mean(trigger_signals_pipeline):.4f}")
    
    # Save models
    pipeline.save_model('./models')
    
    # Print pipeline summary
    print(pipeline.summary())
    
    # Plot motor neuron outputs and triggers
    plt.figure(figsize=(12, 8))
    
    plt.subplot(2, 1, 1)
    plt.plot(ops.to_numpy(motor_outputs_pipeline[:100]), label='Motor Neuron Output')
    threshold_values = ops.full((100,), 0.5)
    plt.plot(ops.to_numpy(threshold_values), 'r--', label='Threshold')
    plt.xlabel('Sample')
    plt.ylabel('Output Value')
    plt.title('Motor Neuron Output and Threshold')
    plt.legend()
    
    plt.subplot(2, 1, 2)
    plt.plot(ops.to_numpy(trigger_signals_pipeline[:100]), 'g', label='Trigger Signal')
    trigger_mean = ops.stats.means.mean(trigger_signals_pipeline)
    plt.axhline(y=ops.to_numpy(trigger_mean), color='r', linestyle='--', 
               label=f'Trigger Rate: {ops.to_numpy(trigger_mean):.2f}')
    plt.xlabel('Sample')
    plt.ylabel('Trigger (0/1)')
    plt.title('Exploration Trigger Signals')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
else:
    print("Pipeline feature extraction failed")

## Conclusion

In this notebook, we've demonstrated how to:

In [None]:
# Conclusion
#
# In this notebook, we've demonstrated how to:

# 1. Extract and prepare data from BigQuery tables using our terabyte-scale feature extractor with BigFrames
print("✅ Extracted and prepared data from BigQuery tables using BigFrames")

# 2. Apply temporal stride processing to capture patterns at different time scales
print("✅ Applied temporal stride processing to capture patterns at different time scales")

# 3. Train a Restricted Boltzmann Machine to learn latent representations
print("✅ Trained a Restricted Boltzmann Machine to learn latent representations")

# 4. Feed the RBM output into a CfC-based liquid neural network with LSTM neurons for gating
print("✅ Fed the RBM output into a CfC-based liquid neural network with LSTM neurons for gating")

# 5. Implement a motor neuron that outputs a value to trigger deeper exploration
print("✅ Implemented a motor neuron that outputs a value to trigger deeper exploration")

# 6. Analyze the results to understand which samples trigger deeper exploration
print("✅ Analyzed the results to understand which samples trigger deeper exploration")

# 7. Used emberharmony's GPU-friendly operations throughout the pipeline
print("✅ Used emberharmony's GPU-friendly operations throughout the pipeline")

# This pipeline can be used for processing terabyte-sized tables efficiently through chunked processing,
# making it suitable for large-scale data analysis and exploration.

# Import the get_backend function
from ember_ml.ops import get_backend

# Summary of the pipeline
print("\nSummary of the pipeline:")
print("------------------------")
print(f"Project ID: {PROJECT_ID}")
print(f"Table ID: {TABLE_ID}")
print(f"Target column: {TARGET_COLUMN}")
print(f"Number of features: {len(train_features) if 'train_features' in globals() else 'N/A'}")
print(f"Number of training samples: {len(train_df) if 'train_df' in globals() else 'N/A'}")
print(f"Number of validation samples: {len(val_df) if 'val_df' in globals() else 'N/A'}")
print(f"Number of test samples: {len(test_df) if 'test_df' in globals() else 'N/A'}")
print(f"RBM hidden units: {rbm.n_hidden if 'rbm' in globals() else 'N/A'}")
print(f"Liquid network type: {'Motor neuron' if 'liquid_network' in globals() else 'N/A'}")
print(f"Backend used: {get_backend()}")

# Next steps
print("\nNext steps:")
print("----------")
print("1. Fine-tune the RBM and liquid network hyperparameters")
print("2. Experiment with different stride perspectives")
print("3. Try different liquid network architectures")
print("4. Apply the pipeline to other BigQuery tables")
print("5. Implement a feedback loop for continuous learning")

# Load the saved models for inference
print("\nTo load the saved models for inference:")
print("-------------------------------------")
print("```python")
print("from emberharmony.models.optimized_rbm import OptimizedRBM")
print("from emberharmony.core.stride_aware_cfc import create_liquid_network_with_motor_neuron")
print("from emberharmony.backend import get_backend")
print("")
print("# Print the current backend")
print("current_backend = get_backend()")
print("print(f\"Using {current_backend} backend\")")
print("")
print("# Load RBM")
print("rbm = OptimizedRBM(n_visible=len(features), n_hidden=64)")
print("rbm.load('./models/rbm.npy')")
print("")
print("# Load liquid network")
print("liquid_network = create_liquid_network_with_motor_neuron(input_dim=64, units=128, output_dim=1)")
print("liquid_network.load_weights('./models/liquid_network')")
print("```")

# Thank you message
print("\nThank you for exploring this notebook!")
print("For more information, please refer to the documentation.")