# Data Loading Demo

In [12]:
import sys
sys.path.append("..")
from data_scripts.data_loading import load_data, partials_n_frames  # New import from our streamlined loader

import os
print(os.getcwd())

/Users/keegz_dsouza/Documents/Code/FINAL/Voice-Cloning/data_scripts


### Loading the Data

In [None]:
dataset, loader = load_data(
    processed_root="/Users/keegz_dsouza/Documents/Code/FINAL/Voice-Cloning/data/processed_data",  # !Always requires absolute path
    speakers_per_batch=40,      
    utterances_per_speaker=10,   
    num_workers=0
)

In [11]:
# Get the first batch from the dataloader
batch = next(iter(loader))

# Explain what a batch is
print("=== Batch Summary ===")
print(f"Batch type: {type(batch)}")
print(f"Number of speakers in batch: {len(batch.speakers)}")
print(f"Combined data shape (utterances x frames x features): {batch.data.shape}")
print()

# Print details for each speaker in the batch
print("=== Speaker Details ===")
for speaker in batch.speakers:
    # For each speaker, sample one partial utterance
    partials = speaker.random_partial(1, partials_n_frames)
    # Each element is a tuple: (Utterance, partial_frames, (start, end))
    utterance_obj, frames, (start, end) = partials[0]
    
    # Get some extra info: total frames in the original utterance (if desired)
    original_frames = utterance_obj.get_frames().shape[0]
    
    print(f"Speaker '{speaker.name}':")
    print(f"  - Sampled partial utterance from frame {start} to {end} (total frames in partial: {frames.shape[0]})")
    print(f"  - Original utterance had {original_frames} frames")
    print(f"  - Partial shape: {frames.shape}")
    print("-" * 40)


=== Batch Summary ===
Batch type: <class 'data_scripts.data_loader.SpeakerBatch'>
Number of speakers in batch: 40
Combined data shape (utterances x frames x features): (400, 160, 40)

=== Speaker Details ===
Speaker '4160':
  - Sampled partial utterance from frame 741 to 901 (total frames in partial: 160)
  - Original utterance had 1372 frames
  - Partial shape: (160, 40)
----------------------------------------
Speaker '7517':
  - Sampled partial utterance from frame 454 to 614 (total frames in partial: 160)
  - Original utterance had 1366 frames
  - Partial shape: (160, 40)
----------------------------------------
Speaker '163':
  - Sampled partial utterance from frame 186 to 346 (total frames in partial: 160)
  - Original utterance had 1063 frames
  - Partial shape: (160, 40)
----------------------------------------
Speaker '5652':
  - Sampled partial utterance from frame 199 to 359 (total frames in partial: 160)
  - Original utterance had 661 frames
  - Partial shape: (160, 40)
---