In [None]:
#close ray first
import os
os.add_dll_directory(r"C:\\ffmpeg\\bin")
import torch
import torchaudio 
import torchaudio.transforms as T
import torchvision.utils
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from sklearn.model_selection import train_test_split
# Ray Tune imports
from ray import train, tune
from ray.tune.schedulers import ASHAScheduler
import torch.optim as optim
import ray
ray.shutdown()
ray.init(include_dashboard=False)

2025-11-02 18:06:20,613	INFO worker.py:2012 -- Started a local Ray instance.


0,1
Python version:,3.11.9
Ray version:,2.51.1


In [2]:
#instantiate a constant string as path to the dataset, keep th double backslashes for windows
AUDIO_DIR = r"C:\batSounds\\classifiedBats"
#instantiate a constant as batch size
BATCH_SIZE = 2
TEST_SPLIT_SIZE = 0.2       # 20% for validation, 80% for training
RANDOM_SEED = 42            # For reproducible splits
TARGET_SAMPLE_RATE = 325000  # Pick a standard rate to resample to
N_MELS = 64                 # Number of Mel filterbanks
#initialize empty int variable
number_of_classes = 0


In [3]:
# Use pathlib to find all .wav files
all_files = list(Path(AUDIO_DIR).rglob("*.wav"))

# Get all unique class names (the subfolder names)
class_names = sorted(list(set(path.parent.name for path in all_files)))

# Create a class-to-index mapping (e.g., {"bat_A": 0, "bat_B": 1})
class_to_idx = {name: i for i, name in enumerate(class_names)}

# Create the master list of (filepath, label_index)
all_samples = []
for file_path in all_files:
    class_name = file_path.parent.name
    label_index = class_to_idx[class_name]
    all_samples.append((str(file_path), label_index))
    
# Create a separate list of just the labels for stratification
all_labels = [label for (path, label) in all_samples]

train_samples, val_samples = train_test_split(
    all_samples,
    test_size=TEST_SPLIT_SIZE,
    random_state=RANDOM_SEED,
    stratify=all_labels  # This is the most important part!
)

number_of_classes = len(class_names)

print(f"Total samples: {len(all_samples)}")
print(f"Training samples: {len(train_samples)}")
print(f"Validation samples: {len(val_samples)}")


Total samples: 360
Training samples: 288
Validation samples: 72


In [4]:
#create a datatransformer to transform the audio to spectrograms
spectrogram_transform = torch.nn.Sequential(
    T.MelSpectrogram(
        sample_rate=TARGET_SAMPLE_RATE,  # 325000
        n_fft=1024,
        hop_length=512,
        n_mels=N_MELS,                       # 128 bins this is the number of mel filterbanks and channels for the CNN
        f_min=20000                      # this filters out low frequencies below 20kHz as bats use ultrasonic frequencies
    ),
    T.AmplitudeToDB()
)

def collate_fn(batch):
    
    spectrograms = []
    labels = []
    
    for (wav_path, label) in batch:
        # --- FIX 1: Load the audio file ---
        try:
            waveform, sample_rate = torchaudio.load(wav_path)
        except Exception as e:
            print(f"Error loading {wav_path}: {e}")
            continue # Skip this file
            
        # --- FIX 1.5: Resample the audio ---
        # This is necessary for the transform to be consistent
        if sample_rate != TARGET_SAMPLE_RATE:
            resampler = T.Resample(orig_freq=sample_rate, new_freq=TARGET_SAMPLE_RATE)
            waveform = resampler(waveform)

        # --- Transform the audio tensor ---
        spec = spectrogram_transform(waveform)
        
        # Squeeze to [Mels, Time] for padding
        spectrograms.append(spec.squeeze(0)) 
        labels.append(label)

    # --- FIX 2: Pad the spectrograms instead of stacking ---
    # Transpose to [Time, Mels] for pad_sequence
    spectrograms = [s.transpose(0, 1) for s in spectrograms]
    
    spectrograms_padded = torch.nn.utils.rnn.pad_sequence(
        spectrograms, 
        batch_first=True, 
        padding_value=0.0
    ) # Shape is now [Batch, MaxTime, Mels]
    
    # Transpose back to [Batch, Mels, MaxTime]
    # and add a channel dim for the CNN: [Batch, 1, Mels, MaxTime]
    spectrograms_padded = spectrograms_padded.transpose(1, 2).unsqueeze(1)
    
    # --- Convert labels to a tensor ---
    labels = torch.tensor(labels)
    
    return spectrograms_padded, labels

In [5]:
#Create a dataloadeer for training and validation
train_loader = DataLoader(train_samples, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_samples, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
#Example of iterating through the dataloader
for spectrograms, labels in train_loader:
    print(spectrograms.shape)
    print(labels)
    break

torch.Size([2, 1, 64, 898])
tensor([2, 2])


In [6]:
import torch.nn as nn
import torch.nn.functional as F

# This is the corrected class that accepts a 'config' dictionary
class BatClassifierCNN(nn.Module):
    def __init__(self, n_classes, config):
        """
        Initialize the model, taking hyperparameters from a 'config' dict.
        """
        super(BatClassifierCNN, self).__init__()
        
        # --- Get hyperparameters from config ---
        fc1_units = config["fc1_units"]
        dropout_rate = config["dropout_rate"]
        
        # --- Block 1 ---
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.skip1 = nn.Conv2d(1, 16, kernel_size=1) 
        self.pool1 = nn.MaxPool2d(2, 2)

        # --- Block 2 ---
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.skip2 = nn.Conv2d(16, 32, kernel_size=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        # --- Block 3 ---
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.skip3 = nn.Conv2d(32, 64, kernel_size=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        # Use the variable dropout_rate
        self.dropout = nn.Dropout(dropout_rate)
        
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))
        
        # Use the variable fc1_units
        self.fc1 = nn.Linear(64 * 4 * 4, fc1_units)
        # This line is also updated to use fc1_units
        self.fc2 = nn.Linear(fc1_units, n_classes)

    def forward(self, x):
        # The forward pass is identical
        
        # --- Block 1 ---
        conv_out = F.relu(self.conv1(x))
        skip_out = self.skip1(x)
        x = self.pool1(F.relu(conv_out + skip_out))
        
        # --- Block 2 ---
        conv_out = F.relu(self.conv2(x))
        skip_out = self.skip2(x)
        x = self.pool2(F.relu(conv_out + skip_out))

        # --- Block 3 ---
        conv_out = F.relu(self.conv3(x))
        skip_out = self.skip3(x)
        x = self.pool3(F.relu(conv_out + skip_out))
        
        # --- Classifier Head ---
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1) 
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        
        return x

In [7]:
METRIC = "accuracy"
MODE = "max"

In [8]:
import tempfile
from ray.tune import Checkpoint
import os
os.environ["TUNE_DISABLE_RICH_LOGGING"] = "1"

EPOCHS = 2
SAMPLES = 2
STORAGE_PATH = r"C:\\batSounds\\RayR"

# Get the number of classes from your dataset
n_classes = number_of_classes

# (Your BatClassifierCNN class, collate_fn, etc. are defined above)

def trainable_function(config, train_dataset, val_dataset, n_classes, collate_fn):
    
    # (Your ffmpeg/DLL fix is correct)
    import os
    try:
        os.add_dll_directory(r"C:\\ffmpeg\\bin")
    except FileNotFoundError:
        pass
    
    # 1. Create DataLoaders (This is correct)
    train_loader = DataLoader(
        train_dataset,
        batch_size=config["batch_size"],
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=0 
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=config["batch_size"] * 2,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=0
    )

    # 2. Setup Model, Optimizer, and Loss (This is correct)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = BatClassifierCNN(n_classes=n_classes, config=config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.CrossEntropyLoss()

    # 3. Training and Validation Loop
    for epoch in range(EPOCHS):
        # --- Training Step (This is correct) ---
        model.train()
        for specs, labels in train_loader:
            specs, labels = specs.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(specs)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

        # --- Validation Step (This is correct) ---
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for specs, labels in val_loader:
                specs, labels = specs.to(device), labels.to(device)
                outputs = model(specs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                correct += predicted.eq(labels).sum().item()
        
        avg_val_loss = (val_loss / len(val_loader)) if len(val_loader) > 0 else 0
        val_accuracy = (correct / total) if total > 0 else 0
        
        # --- 4. THIS IS THE CORRECTED CHECKPOINTING CODE ---
        
        # A. Create a temporary directory
        with tempfile.TemporaryDirectory() as temp_checkpoint_dir:
            
            # B. Save the model into that new directory
            path = os.path.join(temp_checkpoint_dir, "model.pth")
            torch.save(model.state_dict(), path)

            # C. Report metrics AND the checkpoint from the temp directory
            # Ray Tune will copy this to the permanent trial folder for you.
            ray.tune.report(
                {"loss": avg_val_loss, "accuracy": val_accuracy},
                checkpoint=Checkpoint.from_directory(temp_checkpoint_dir)
            )
        
# Add batch_size to the search space
search_space = {
    "lr": tune.loguniform(1e-4, 1e-2),
    "fc1_units": tune.choice([256, 512]), 
    "dropout_rate": tune.uniform(0.2, 0.3),
    "batch_size": tune.choice([2,4])
}

# --- This call is now changed ---
# Pass the Datasets and collate_fn, NOT the DataLoaders
trainable_with_data = tune.with_parameters(
    trainable_function,
    train_dataset=train_samples,
    val_dataset=val_samples,
    n_classes=n_classes,
    collate_fn=collate_fn
)

# -----------------------------------------------------------------
# STEP 3: CONFIGURE AND RUN THE TUNER
# -----------------------------------------------------------------

# Use an ASHA Scheduler to kill bad trials early
scheduler = ASHAScheduler(
    metric=METRIC,
    mode=MODE,
    max_t=EPOCHS,         # Max 10 epochs
    grace_period=1,   # Let every trial run at least 1 epoch
    reduction_factor=2
)

# Configure the Tuner

def short_trial_name(trial):
    return f"trial_{trial.trial_id}"


tuner = tune.Tuner(
    trainable_with_data,
    param_space=search_space,
    tune_config=tune.TuneConfig(
        num_samples=SAMPLES,  # Run 10 different hyperparameter combinations
        scheduler=scheduler,
        trial_dirname_creator=short_trial_name # <-- Add this line

    ),
    run_config=tune.RunConfig(
        name="bat_tuning_experiment",
        storage_path="C:\\batSounds\\RayR", # Where to save results
        verbose=1
    )
)

# Run the tuning process!
results = tuner.fit()

# Get the best result
best_result = results.get_best_result(metric="accuracy", mode="max")

print("\n--- TUNING COMPLETE ---")
print(f"Best trial config: {best_result.config}")
print(f"Best trial final validation accuracy: {best_result.metrics['accuracy']}")
print(f"Best trial final validation loss: {best_result.metrics['loss']}")

0,1
Current time:,2025-11-02 18:08:00
Running for:,00:01:36.04
Memory:,15.3/31.6 GiB

Trial name,status,loc,batch_size,dropout_rate,fc1_units,lr,iter,total time (s),loss,accuracy
trainable_function_431b8_00000,TERMINATED,127.0.0.1:17000,4,0.280018,512,0.00360126,2,90.6791,1.08029,0.555556
trainable_function_431b8_00001,TERMINATED,127.0.0.1:7260,4,0.229884,256,0.00920451,2,89.3075,1.13371,0.555556


[36m(pid=gcs_server)[0m [2025-11-02 18:06:48,290 E 19784 20052] (gcs_server.exe) gcs_server.cc:302: Failed to establish connection to the event+metrics exporter agent. Events and metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[33m(raylet)[0m [2025-11-02 18:06:52,272 E 22652 6296] (raylet.exe) main.cc:975: Failed to establish connection to the metrics exporter agent. Metrics will not be exported. Exporter agent status: RpcError: Running out of retries to initialize the metrics agent. rpc_code: 14
[36m(trainable_function pid=7260)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/batSounds/RayR/bat_tuning_experiment/trial_431b8_00001/checkpoint_000000)
[36m(trainable_function pid=7260)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/batSounds/RayR/bat_tuning_experiment/trial_431b8_00001/checkpoint_000001)[32m [repeated 2x across cluster] (Ray


--- TUNING COMPLETE ---
Best trial config: {'lr': 0.0036012611192963806, 'fc1_units': 512, 'dropout_rate': 0.2800175605278557, 'batch_size': 4}
Best trial final validation accuracy: 0.5555555555555556
Best trial final validation loss: 1.0802880658043756


[36m(trainable_function pid=17000)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/batSounds/RayR/bat_tuning_experiment/trial_431b8_00000/checkpoint_000001)


In [9]:
import torch
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# --- Make sure these are defined ---
# from your_model_file import BatClassifierCNN 
# from your_data_file import test_loader, class_names, number_of_classes
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# results = ... (your results object from tuner.fit())

# --- Step 1: Get Best Result ---
# This line is correct
best_result = results.get_best_result(metric="accuracy", mode="max")

# --- Step 2: Get Config and CORRECT Model Path ---

# 1. Get the config (this is correct)
best_config = best_result.config

# 2. Get the path to the CHECKPOINT directory
# This .path attribute now points to the subfolder (e.g., ".../checkpoint_000001/")
best_checkpoint_dir = best_result.checkpoint.path

# 3. Create the final path to the model file
# (We saved it as "model.pth" inside that directory)
model_state_path = os.path.join(best_checkpoint_dir, "model.pth")

# --- Step 3: Load Model (This is the same as before) ---
model = BatClassifierCNN(n_classes=number_of_classes, config=best_config)
model.load_state_dict(torch.load(model_state_path, map_location=torch.device('cpu')))
model.to(device)

print(f"Successfully loaded best model from: {model_state_path}")
print(f"With config: {best_config}")

# --- Step 4 & 5 (Get Predictions and Plot) ---
# (These are identical to the previous answer)

y_true = []
y_pred = []
model.eval()

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted_indices = torch.max(outputs.data, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted_indices.cpu().numpy())

print("Inference complete.")

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)

fig, ax = plt.subplots(figsize=(10, 10))
disp.plot(ax=ax, cmap=plt.cm.Blues, xticks_rotation='vertical')
plt.title('Confusion Matrix for Best Model')
plt.show()

NameError: name 'device' is not defined

In [None]:
# Create lists to store all true labels and predicted labels
y_true = []
y_pred = []

# Set model to evaluation mode
model.eval()

# Run inference
with torch.no_grad():
    for inputs, labels in test_loader:
        # Move data to the device
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Get model outputs (logits)
        outputs = model(inputs)

        # Get the prediction (class with the highest score)
        # torch.max returns (values, indices)
        _, predicted_indices = torch.max(outputs.data, 1)

        # Append batch results to our lists
        # .cpu() is important to move data back from GPU to main memory
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted_indices.cpu().numpy())

print("Inference complete.")
# 1. Generate the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# 2. Plot the confusion matrix
# !!! Make sure class_names is defined
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=class_names)

# Plot and show
fig, ax = plt.subplots(figsize=(10, 10))
disp.plot(ax=ax, cmap=plt.cm.Blues, xticks_rotation='vertical')
plt.title('Confusion Matrix for Best Model')
plt.show()