In [44]:
import torch 
from torch import nn , optim
from tqdm import tqdm
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import json
from pathlib import Path

# Load preprocessed data
preprocessed_dir = Path("preprocessed")
snaps_dir = Path("preprocessed_snaps")

# Load mel-spectrograms (.npy files)
mel_files = list(preprocessed_dir.glob("*.npy"))
print(f"Found {len(mel_files)} mel-spectrogram files")

# Load corresponding snap parameters (.json files)
snap_files = list(snaps_dir.glob("*.json"))
print(f"Found {len(snap_files)} snap parameter files")

# Create X (mel-spectrograms) and y (snap parameters)
X_data = []
y_data = []

def extract_numeric_values(data):
    """Extract only numeric values from snap parameters"""
    numeric_values = []
    
    if isinstance(data, dict):
        for key, value in data.items():
            if isinstance(value, (int, float)):
                numeric_values.append(float(value))
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, (int, float)):
                        numeric_values.append(float(item))
    elif isinstance(data, list):
        for item in data:
            if isinstance(item, (int, float)):
                numeric_values.append(float(item))
    
    return numeric_values

for mel_file in mel_files:
    # Load mel-spectrogram
    mel = np.load(mel_file)
    
    # Find corresponding snap file
    snap_file = snaps_dir / (mel_file.stem + ".json")
    if snap_file.exists():
        with open(snap_file, 'r') as f:
            snap_params = json.load(f)
        
        # Extract only numeric values
        numeric_values = extract_numeric_values(snap_params)
        
        if len(numeric_values) > 0:  # Only add if we have numeric values
            X_data.append(torch.tensor(mel, dtype=torch.float32))
            y_data.append(torch.tensor(numeric_values, dtype=torch.float32))

print(f"Loaded {len(X_data)} samples")
if len(X_data) > 0:
    print(f"Mel shape: {X_data[0].shape}")
    print(f"Snap shape: {y_data[0].shape}")
    print(f"Snap values example: {y_data[0][:5]}")  # Show first 5 values

# Convert to tensors
if len(X_data) > 0:
    X = torch.stack(X_data)  # Shape: (N, mel_bins, time_frames)
    y = torch.stack(y_data)   # Shape: (N, snap_params)
    
    # Transpose X to (N, time_frames, mel_bins) for RNN
    X = X.transpose(1, 2)
    N, L, Hin = X.shape
    print(f"Final X shape: {X.shape}, y shape: {y.shape}")
else:
    print("No valid data found!")
    X = torch.randn(4, 3, 2)  # Fallback dummy data
    y = torch.randn(4, 5)      # Fallback dummy targets
    N, L, Hin = X.shape

batchsize = 2

Found 35 mel-spectrogram files
Found 25 snap parameter files
Loaded 0 samples
No valid data found!


In [None]:
# Create train/test split
from torch._tensor import Tensor


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print(f"Train: {X_train.shape}, Test: {X_test.shape}")

# Create data loaders
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader[tuple[Tensor, ...]](train_dataset, batch_size=batchsize, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batchsize, shuffle=False)

# Test data loading
for count, (xb, yb) in enumerate(train_loader):
    print(f"Batch {count}: X shape: {xb.shape}, y shape: {yb.shape}")
    if count >= 2:  # Show first 3 batches
        break




Train: torch.Size([3, 3, 2]), Test: torch.Size([1, 3, 2])
Batch 0: X shape: torch.Size([2, 3, 2]), y shape: torch.Size([2, 5])
Batch 1: X shape: torch.Size([1, 3, 2]), y shape: torch.Size([1, 5])


In [46]:
# Model Architecture
Hout = 64  # Hidden state size (increased for better capacity)
output_dim = y.shape[1]  # Number of snap parameters to predict

print(f"Input features (mel bins): {Hin}")
print(f"Output features (snap params): {output_dim}")
print(f"Hidden size: {Hout}")

# Initialize weights
Wx = torch.randn(Hout, Hin) * 0.1
bx = torch.zeros(Hout)
Wh = torch.randn(Hout, Hout) * 0.1
bh = torch.zeros(Hout)


Input features (mel bins): 2
Output features (snap params): 5
Hidden size: 64


In [47]:

# Create RNN backbone
backbone = nn.RNN(input_size=Hin, hidden_size=Hout, batch_first=True)
backbone.weight_ih_l0.data = Wx
backbone.bias_ih_l0.data = bx
backbone.weight_hh_l0.data = Wh
backbone.bias_hh_l0.data = bh

print("RNN backbone created successfully")

RNN backbone created successfully


In [48]:
# Create prediction head
head = nn.Linear(in_features=Hout, out_features=output_dim)
print(f"Head layer: {Hout} -> {output_dim}")

# Combine model components
model = nn.ModuleList([backbone, head])
print("Model created successfully")

Head layer: 64 -> 5
Model created successfully


In [49]:

# Training setup
loss_fn = nn.MSELoss()  # Regression loss for snap parameter prediction
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("Starting training...")
for epoch in tqdm(range(25)):  # Train for 100 epochs
    model.train()
    total_loss = 0
    
    for xb, yb in train_loader:
        # Initialize hidden state for each batch
        ht_1 = torch.zeros(1, xb.size(0), Hout)
        
        # Forward pass
        output, ht = backbone(xb, ht_1)
        
        # Use the last hidden state for prediction
        y_pred = head(ht.squeeze(0))  # Remove batch dimension
        
        # Compute loss
        loss = loss_fn(y_pred, yb)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")

print("Training completed!")

# Create models folder and save the trained model
models_dir = Path("models")
models_dir.mkdir(exist_ok=True)

# Save the trained model
model_save_path = models_dir / "trained_model.pt"
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': avg_loss,
    'epoch': 5,
    'model_config': {
        'Hin': Hin,
        'Hout': Hout,
        'output_dim': output_dim,
        'L': L
    }
}, model_save_path)

print(f"Model saved to {model_save_path}")

# Also save just the model for easy loading
complete_model_path = models_dir / "model_complete.pt"
torch.save(model, complete_model_path)
print(f"Complete model saved to {complete_model_path}")


Starting training...


100%|██████████| 25/25 [00:00<00:00, 559.86it/s]

Epoch 1, Average Loss: 0.8480
Epoch 2, Average Loss: 0.8920
Epoch 3, Average Loss: 0.8128
Epoch 4, Average Loss: 0.7216
Epoch 5, Average Loss: 0.7231
Epoch 6, Average Loss: 0.7231
Epoch 7, Average Loss: 0.6210
Epoch 8, Average Loss: 0.5940
Epoch 9, Average Loss: 0.5513
Epoch 10, Average Loss: 0.5276
Epoch 11, Average Loss: 0.4663
Epoch 12, Average Loss: 0.4687
Epoch 13, Average Loss: 0.4816
Epoch 14, Average Loss: 0.4533
Epoch 15, Average Loss: 0.3972
Epoch 16, Average Loss: 0.4024
Epoch 17, Average Loss: 0.3638
Epoch 18, Average Loss: 0.3628
Epoch 19, Average Loss: 0.2524
Epoch 20, Average Loss: 0.3301
Epoch 21, Average Loss: 0.3141
Epoch 22, Average Loss: 0.3228
Epoch 23, Average Loss: 0.2863
Epoch 24, Average Loss: 0.3142
Epoch 25, Average Loss: 0.1949
Training completed!
Model saved to models/trained_model.pt
Complete model saved to models/model_complete.pt





In [50]:
# How to load the saved model from models folder

# Method 1: Load complete model (easiest) - with weights_only=False for trusted source
loaded_model = torch.load("models/model_complete.pt", weights_only=False)
print("Complete model loaded successfully")

# Method 2: Load state dict (more flexible)
checkpoint = torch.load("models/trained_model.pt")
print(f"Loaded checkpoint from epoch {checkpoint['epoch']}")
print(f"Final loss: {checkpoint['loss']:.4f}")
print(f"Model config: {checkpoint['model_config']}")

# Create new model and load state dict
new_model = nn.ModuleList([
    nn.RNN(input_size=checkpoint['model_config']['Hin'], 
           hidden_size=checkpoint['model_config']['Hout'], 
           batch_first=True),
    nn.Linear(in_features=checkpoint['model_config']['Hout'], 
              out_features=checkpoint['model_config']['output_dim'])
])

new_model.load_state_dict(checkpoint['model_state_dict'])
print("Model state dict loaded successfully")

# Example: Make a prediction with loaded model
if len(X_data) > 0:
    with torch.no_grad():
        sample_input = X[:1]  # Take first sample
        ht_1 = torch.zeros(1, 1, checkpoint['model_config']['Hout'])
        output, ht = new_model[0](sample_input, ht_1)
        prediction = new_model[1](ht.squeeze(0))
        print(f"Sample prediction shape: {prediction.shape}")
        print(f"Sample prediction values: {prediction[0][:5]}")  # First 5 values


Complete model loaded successfully
Loaded checkpoint from epoch 5
Final loss: 0.1949
Model config: {'Hin': 2, 'Hout': 64, 'output_dim': 5, 'L': 3}
Model state dict loaded successfully
