In [6]:
from torch.utils.data import DataLoader
import torch
import random
from typing import Dict

# Import missing modules for optimization
import torch.optim as optim
from torch.optim import lr_scheduler

# Import our custom dataset and augmentation pipeline.
from process_sml import (
    AudioDatasetFolder, Compose,
    RandomPitchShift_wav,RandomVolume_wav,RandomAbsoluteNoise_wav,RandomSpeed_wav,RandomFade_wav,RandomFrequencyMasking_spec,RandomTimeMasking_spec,RandomTimeStretch_spec)
# Import the UNet model and the training function from the training module.
from train_sml import UNet, train_model_source_separation,LiteResUNet,infer_and_save
import torch.nn as nn

# Define the component map for the dataset.
COMPONENT_MAP = ["mixture", "drums", "bass", "other_accompaniment", "vocals"]
label_names = ["drums", "bass", "other_accompaniment", "vocals"]

dataset_val = AudioDatasetFolder(
    csv_file='output_stems/test_one.csv',
    audio_dir='.',  # adjust as needed
    components=COMPONENT_MAP,
    sample_rate=16000,
    duration=10.0,
    is_track_id=True,
    input_name= "mixture",

)
data_loader = DataLoader(dataset_val, batch_size=40)


In [7]:
model = LiteResUNet(backbone="resnet18",source_names=label_names,pretrained=True,in_channels=4)
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [8]:
presaved_weights = torch.load("checkpoints/model_epoch_28.pth")
state_of_dict = presaved_weights['model_state_dict']


In [9]:
model.load_state_dict(state_dict=state_of_dict)

<All keys matched successfully>

In [10]:

# IMPORTANT: Move the entire model to the device after adding the final conv layers.
model = model.to(device)

# after training:
infer_and_save(
    model=model,
    dataloader=data_loader,
    device=device,
    output_dir="./inference_outputs",
    input_name="mixture",
    label_names=["drums", "bass", "other_accompaniment", "vocals"],
    sample_rate=16000,
)



✅ All inference outputs saved to ./inference_outputs
