# Pipeline Code Tester

In [None]:
# --- CELL 0: CONFIGURE DISPLAY ---

import pandas as pd

# Remove limits on how many columns and rows are shown
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)     # Show all rows
pd.set_option('display.max_colwidth', None) # Don't truncate content inside cells
pd.set_option('display.width', None)        # Auto-detect screen width

print("✅ Pandas display limits removed. Be careful printing huge dataframes!")

In [None]:
# --- CELL 1: SETUP ---
%load_ext autoreload
%autoreload 2

import numpy as np
# Import your specific file. 
# Ensure transformation_mart_pipeline.py is in the same folder as this notebook.
import transformation_mart_pipeline as robot_code

# Check if it loaded correctly by printing the version or a known variable
print(f"✅ Loaded pipeline. VISUAL_CONF_THR is set to: {robot_code.VISUAL_CONF_THR}")

In [None]:
# --- CELL 2: DATA LOADING ---
# Load your local CSVs (or generate synthetic data if you don't have them yet)

try:
    audio_df = pd.read_csv('stg_audio_data.csv')
    imu_df = pd.read_csv('stg_imu_data.csv')
    visual_df = pd.read_csv('stg_visual_data.csv')
    motor_df = pd.read_csv('stg_motor_data.csv')
    print("✅ Real Data Loaded")
except FileNotFoundError:
    print("⚠️ Files not found. You need the CSV files in this folder to run the pipeline.")
    # If you need the synthetic data generator from the previous answer, 
    # you can paste just that function here.

In [None]:
# --- CELL 3: TRANSFORM (SAFE MODE) ---

# 1. AUDIO (Will auto-clean memory after running)
print("Processing Audio...")
t_audio = robot_code.transform_audio(audio_df)

# 2. IMU (Lightweight, safe to run)
print("Processing IMU...")
t_imu = robot_code.transform_imu(imu_df)

# 3. VISUAL (Heavy, run last)
print("Processing Visual...")
# Tip: Use 'yolov8n.pt' (nano) instead of 'medium' if you still crash
t_visual = robot_code.transform_visual(visual_df, device='cpu') 

# 4. MOTOR
print("Processing Motor...")
t_motor = robot_code.transform_motor(motor_df)

print("✅ All transformations complete.")

In [None]:
# --- CELL 4: INSPECT INTERMEDIATES ---

# display(t_visual) 
# display(t_imu)
display(t_audio)

In [None]:
# --- CELL 5: MART GENERATION ---

mart_df = robot_code.build_mrt_experiences(
    t_audio, 
    t_imu, 
    t_visual, 
    t_motor, 
    N_FRAMES=12
)

print(f"✅ Mart Built. Total Experience Rows: {len(mart_df)}")
display(mart_df)

# Pipeline Plotting

In [None]:
# --- CELL: SPECTROGRAM + DETECTION VISUALIZER ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ast
import transformation_mart_pipeline as robot_code

# 1. Load Raw Data
print("Loading data...")
df_audio = pd.read_csv('stg_audio_data.csv')

# 2. Parse Audio Samples
print("Parsing audio samples...")
all_samples = []
frame_ids = []

def parse_samples(val):
    if isinstance(val, str):
        try: return ast.literal_eval(val)
        except: return []
    return val if isinstance(val, list) else []

for idx, row in df_audio.iterrows():
    s = parse_samples(row['audio_samples'])
    if s:
        all_samples.extend(s)
        frame_ids.append(row['frame_id'])

audio_stream = np.array(all_samples)

# 3. Run the Pipeline
print("Running detection pipeline...")
# This will now use the CPU and should NOT crash
t_audio = robot_code.transform_audio(df_audio)

# 4. PLOTTING
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10), sharex=True)

# --- TOP PLOT: SPECTROGRAM ---
if len(audio_stream) > 0:
    Pxx, freqs, bins, im = ax1.specgram(
        audio_stream, 
        NFFT=1024, 
        Fs=16000,   
        noverlap=512, 
        cmap='magma'
    )
    # Align visual extent to frame IDs
    if frame_ids:
        im.set_extent([min(frame_ids), max(frame_ids), 0, 8000]) 
    
    ax1.set_ylabel("Frequency (Hz)")
    ax1.set_title("Spectrogram (Visual Truth)", fontsize=12)
    ax1.grid(False)

# --- BOTTOM PLOT: DETECTIONS ---
# Plot Cat Voice
ax2.plot(t_audio['frame_id'], t_audio['is_cat_voice'], 
         label='Cat (AST Model)', color='#FF8C00', linewidth=2, drawstyle='steps-post')

# Plot Human Voice
ax2.plot(t_audio['frame_id'], t_audio['is_human_voice'] + 0.05, 
         label='Human (AST Model)', color='#1E90FF', linestyle='--', linewidth=2, drawstyle='steps-post')

ax2.set_ylabel("Detection")
ax2.set_xlabel("Frame ID")
ax2.set_title("AST Model Detections", fontsize=12)
ax2.set_yticks([0, 1])
ax2.set_yticklabels(['Silence', 'Detected'])
ax2.legend(loc='upper right')
ax2.grid(True, alpha=0.3)

# Highlight detected regions
cat_indices = t_audio[t_audio['is_cat_voice'] == 1]['frame_id']
if not cat_indices.empty:
    for frame in cat_indices:
        ax2.axvspan(frame, frame + 1, color='orange', alpha=0.15)

plt.tight_layout()
plt.show()