In [1]:
import os
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm

# ✅ Set paths
soundscape_dir = "E:/birdclef-2024/unlabeled_soundscapes"
output_dir = "E:/birdclef-2024/unlabeled_segments_filtered"
os.makedirs(output_dir, exist_ok=True)

# ✅ Parameters
sample_rate = 32000
segment_duration = 5  # seconds
samples_per_segment = sample_rate * segment_duration
max_segments_per_file = 10

# ✅ Process each soundscape file
for filename in tqdm(os.listdir(soundscape_dir)):
    if not filename.endswith(".ogg") and not filename.endswith(".wav"):
        continue

    file_path = os.path.join(soundscape_dir, filename)
    y, sr = librosa.load(file_path, sr=sample_rate)
    soundscape_id = os.path.splitext(filename)[0]

    # Detect non-silent intervals (in samples)
    intervals = librosa.effects.split(y, top_db=30)  # adjust top_db to tune sensitivity

    segment_count = 0
    for start_sample, end_sample in intervals:
        if segment_count >= max_segments_per_file:
            break

        segment = y[start_sample:end_sample]

        # Skip if the active segment is shorter than 5 seconds
        if len(segment) < samples_per_segment:
            continue

        # Chop into consecutive 5s chunks from this active region
        for i in range(0, len(segment) - samples_per_segment + 1, samples_per_segment):
            if segment_count >= max_segments_per_file:
                break

            chunk = segment[i:i + samples_per_segment]
            segment_filename = f"{soundscape_id}_{segment_count*5}_{(segment_count+1)*5}.wav"
            segment_path = os.path.join(output_dir, segment_filename)
            sf.write(segment_path, chunk, samplerate=sample_rate)
            segment_count += 1

print("✅ Filtered soundscape segmentation completed.")


100%|██████████| 8444/8444 [58:12<00:00,  2.42it/s] 

✅ Filtered soundscape segmentation completed.





In [5]:
import os
import librosa
import numpy as np
import cv2
from tqdm import tqdm
import gc
import matplotlib.pyplot as plt
from matplotlib import cm

# ✅ Paths
input_dir = "E:/birdclef-2024/unlabeled_segments_filtered"
output_dir = "E:/birdclef-2024/unlabeled_mels"
os.makedirs(output_dir, exist_ok=True)

# ✅ Parameters
sample_rate = 32000
n_mels = 128
n_fft = 1042
hop_length = 500
batch_size = 1000

# ✅ Get list of .wav files
all_files = [f for f in os.listdir(input_dir) if f.endswith(".wav")]

for batch_start in range(0, len(all_files), batch_size):
    batch_files = all_files[batch_start:batch_start + batch_size]

    for filename in tqdm(batch_files, desc=f"Processing batch {batch_start // batch_size + 1}"):
        file_path = os.path.join(input_dir, filename)
        y, sr = librosa.load(file_path, sr=sample_rate)

        # Generate mel spectrogram in dB
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
        S_dB = librosa.power_to_db(S, ref=np.max)

        # Normalize and convert to colormap (like matplotlib's 'viridis')
        S_dB_norm = 255 * (S_dB - S_dB.min()) / (S_dB.max() - S_dB.min())
        S_img = S_dB_norm.astype(np.uint8)

        colormap = cm.get_cmap('viridis')
        S_colored = colormap(S_img / 255.0)[:, :, :3]  # Drop alpha channel
        S_rgb = (S_colored * 255).astype(np.uint8)

        # Resize to 224x224
        S_img_resized = cv2.resize(S_rgb, (224, 224), interpolation=cv2.INTER_AREA)

        # Save as PNG image
        output_path = os.path.join(output_dir, filename.replace(".wav", ".png"))
        cv2.imwrite(output_path, cv2.cvtColor(S_img_resized, cv2.COLOR_RGB2BGR))

        gc.collect()

    print(f"✅ Completed batch {batch_start // batch_size + 1}")

print("✅ Mel spectrogram generation completed in color with colormap.")


  colormap = cm.get_cmap('viridis')
Processing batch 1: 100%|██████████| 1000/1000 [03:51<00:00,  4.33it/s]


✅ Completed batch 1


Processing batch 2: 100%|██████████| 1000/1000 [03:49<00:00,  4.36it/s]


✅ Completed batch 2


Processing batch 3: 100%|██████████| 1000/1000 [03:40<00:00,  4.53it/s]


✅ Completed batch 3


Processing batch 4: 100%|██████████| 1000/1000 [03:37<00:00,  4.60it/s]


✅ Completed batch 4


Processing batch 5: 100%|██████████| 1000/1000 [03:42<00:00,  4.49it/s]


✅ Completed batch 5


Processing batch 6: 100%|██████████| 1000/1000 [03:41<00:00,  4.52it/s]


✅ Completed batch 6


Processing batch 7: 100%|██████████| 1000/1000 [04:10<00:00,  4.00it/s]


✅ Completed batch 7


Processing batch 8: 100%|██████████| 1000/1000 [04:28<00:00,  3.73it/s]


✅ Completed batch 8


Processing batch 9: 100%|██████████| 1000/1000 [04:23<00:00,  3.80it/s]


✅ Completed batch 9


Processing batch 10: 100%|██████████| 1000/1000 [04:20<00:00,  3.84it/s]


✅ Completed batch 10


Processing batch 11: 100%|██████████| 1000/1000 [04:19<00:00,  3.85it/s]


✅ Completed batch 11


Processing batch 12: 100%|██████████| 1000/1000 [04:16<00:00,  3.90it/s]


✅ Completed batch 12


Processing batch 13: 100%|██████████| 1000/1000 [04:15<00:00,  3.92it/s]


✅ Completed batch 13


Processing batch 14: 100%|██████████| 1000/1000 [04:02<00:00,  4.13it/s]


✅ Completed batch 14


Processing batch 15: 100%|██████████| 1000/1000 [04:03<00:00,  4.10it/s]


✅ Completed batch 15


Processing batch 16: 100%|██████████| 1000/1000 [04:01<00:00,  4.14it/s]


✅ Completed batch 16


Processing batch 17: 100%|██████████| 1000/1000 [04:01<00:00,  4.14it/s]


✅ Completed batch 17


Processing batch 18: 100%|██████████| 1000/1000 [03:53<00:00,  4.27it/s]


✅ Completed batch 18


Processing batch 19: 100%|██████████| 1000/1000 [03:56<00:00,  4.22it/s]


✅ Completed batch 19


Processing batch 20: 100%|██████████| 1000/1000 [03:55<00:00,  4.25it/s]


✅ Completed batch 20


Processing batch 21: 100%|██████████| 1000/1000 [03:56<00:00,  4.22it/s]


✅ Completed batch 21


Processing batch 22: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]


✅ Completed batch 22


Processing batch 23: 100%|██████████| 1000/1000 [03:58<00:00,  4.19it/s]


✅ Completed batch 23


Processing batch 24: 100%|██████████| 1000/1000 [03:54<00:00,  4.26it/s]


✅ Completed batch 24


Processing batch 25: 100%|██████████| 1000/1000 [03:53<00:00,  4.29it/s]


✅ Completed batch 25


Processing batch 26: 100%|██████████| 1000/1000 [03:53<00:00,  4.28it/s]


✅ Completed batch 26


Processing batch 27: 100%|██████████| 1000/1000 [04:04<00:00,  4.09it/s]


✅ Completed batch 27


Processing batch 28: 100%|██████████| 1000/1000 [04:30<00:00,  3.70it/s]


✅ Completed batch 28


Processing batch 29: 100%|██████████| 1000/1000 [04:10<00:00,  4.00it/s]


✅ Completed batch 29


Processing batch 30: 100%|██████████| 1000/1000 [04:02<00:00,  4.12it/s]


✅ Completed batch 30


Processing batch 31: 100%|██████████| 1000/1000 [03:54<00:00,  4.26it/s]


✅ Completed batch 31


Processing batch 32: 100%|██████████| 1000/1000 [03:54<00:00,  4.27it/s]


✅ Completed batch 32


Processing batch 33: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]


✅ Completed batch 33


Processing batch 34: 100%|██████████| 1000/1000 [05:20<00:00,  3.12it/s]


✅ Completed batch 34


Processing batch 35: 100%|██████████| 1000/1000 [03:48<00:00,  4.38it/s]


✅ Completed batch 35


Processing batch 36: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]


✅ Completed batch 36


Processing batch 37: 100%|██████████| 1000/1000 [03:46<00:00,  4.41it/s]


✅ Completed batch 37


Processing batch 38: 100%|██████████| 1000/1000 [03:44<00:00,  4.46it/s]


✅ Completed batch 38


Processing batch 39: 100%|██████████| 1000/1000 [03:55<00:00,  4.24it/s]


✅ Completed batch 39


Processing batch 40: 100%|██████████| 1000/1000 [03:55<00:00,  4.25it/s]


✅ Completed batch 40


Processing batch 41: 100%|██████████| 1000/1000 [03:47<00:00,  4.40it/s]


✅ Completed batch 41


Processing batch 42: 100%|██████████| 1000/1000 [03:50<00:00,  4.35it/s]


✅ Completed batch 42


Processing batch 43: 100%|██████████| 1000/1000 [03:51<00:00,  4.33it/s]


✅ Completed batch 43


Processing batch 44: 100%|██████████| 1000/1000 [03:50<00:00,  4.34it/s]


✅ Completed batch 44


Processing batch 45: 100%|██████████| 1000/1000 [03:47<00:00,  4.41it/s]


✅ Completed batch 45


Processing batch 46: 100%|██████████| 1000/1000 [03:49<00:00,  4.37it/s]


✅ Completed batch 46


Processing batch 47: 100%|██████████| 1000/1000 [03:51<00:00,  4.32it/s]


✅ Completed batch 47


Processing batch 48: 100%|██████████| 1000/1000 [03:46<00:00,  4.42it/s]


✅ Completed batch 48


Processing batch 49: 100%|██████████| 1000/1000 [03:48<00:00,  4.38it/s]


✅ Completed batch 49


Processing batch 50: 100%|██████████| 1000/1000 [03:46<00:00,  4.41it/s]


✅ Completed batch 50


Processing batch 51: 100%|██████████| 1000/1000 [03:48<00:00,  4.38it/s]


✅ Completed batch 51


Processing batch 52: 100%|██████████| 1000/1000 [03:48<00:00,  4.38it/s]


✅ Completed batch 52


Processing batch 53: 100%|██████████| 1000/1000 [03:48<00:00,  4.37it/s]


✅ Completed batch 53


Processing batch 54: 100%|██████████| 1000/1000 [04:03<00:00,  4.11it/s]


✅ Completed batch 54


Processing batch 55: 100%|██████████| 1000/1000 [03:57<00:00,  4.22it/s]


✅ Completed batch 55


Processing batch 56: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]


✅ Completed batch 56


Processing batch 57: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]


✅ Completed batch 57


Processing batch 58: 100%|██████████| 1000/1000 [03:55<00:00,  4.25it/s]


✅ Completed batch 58


Processing batch 59: 100%|██████████| 1000/1000 [03:55<00:00,  4.25it/s]


✅ Completed batch 59


Processing batch 60: 100%|██████████| 1000/1000 [03:50<00:00,  4.34it/s]


✅ Completed batch 60


Processing batch 61: 100%|██████████| 1000/1000 [03:46<00:00,  4.42it/s]


✅ Completed batch 61


Processing batch 62: 100%|██████████| 1000/1000 [03:54<00:00,  4.27it/s]


✅ Completed batch 62


Processing batch 63: 100%|██████████| 1000/1000 [03:56<00:00,  4.24it/s]


✅ Completed batch 63


Processing batch 64: 100%|██████████| 1000/1000 [03:52<00:00,  4.30it/s]


✅ Completed batch 64


Processing batch 65: 100%|██████████| 1000/1000 [03:50<00:00,  4.34it/s]


✅ Completed batch 65


Processing batch 66: 100%|██████████| 1000/1000 [03:49<00:00,  4.35it/s]


✅ Completed batch 66


Processing batch 67: 100%|██████████| 1000/1000 [03:50<00:00,  4.35it/s]


✅ Completed batch 67


Processing batch 68: 100%|██████████| 1000/1000 [03:51<00:00,  4.31it/s]


✅ Completed batch 68


Processing batch 69: 100%|██████████| 1000/1000 [03:47<00:00,  4.39it/s]


✅ Completed batch 69


Processing batch 70: 100%|██████████| 1000/1000 [03:42<00:00,  4.49it/s]


✅ Completed batch 70


Processing batch 71: 100%|██████████| 1000/1000 [03:41<00:00,  4.52it/s]


✅ Completed batch 71


Processing batch 72: 100%|██████████| 1000/1000 [03:54<00:00,  4.27it/s]


✅ Completed batch 72


Processing batch 73: 100%|██████████| 1000/1000 [03:50<00:00,  4.34it/s]


✅ Completed batch 73


Processing batch 74: 100%|██████████| 1000/1000 [04:08<00:00,  4.03it/s]


✅ Completed batch 74


Processing batch 75: 100%|██████████| 1000/1000 [03:50<00:00,  4.34it/s]


✅ Completed batch 75


Processing batch 76: 100%|██████████| 1000/1000 [03:47<00:00,  4.39it/s]


✅ Completed batch 76


Processing batch 77: 100%|██████████| 1000/1000 [03:43<00:00,  4.47it/s]


✅ Completed batch 77


Processing batch 78: 100%|██████████| 1000/1000 [03:45<00:00,  4.43it/s]


✅ Completed batch 78


Processing batch 79: 100%|██████████| 1000/1000 [03:46<00:00,  4.41it/s]


✅ Completed batch 79


Processing batch 80: 100%|██████████| 1000/1000 [03:48<00:00,  4.37it/s]


✅ Completed batch 80


Processing batch 81: 100%|██████████| 1000/1000 [03:46<00:00,  4.41it/s]


✅ Completed batch 81


Processing batch 82: 100%|██████████| 279/279 [01:04<00:00,  4.35it/s]

✅ Completed batch 82
✅ Mel spectrogram generation completed in color with colormap.





In [2]:
#INFERENCE MODEL ON UNLABELED SOUNDSCAPE USING RESNET-50

import os
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import pandas as pd
from tqdm import tqdm
import numpy as np

# ✅ Paths
model_path = "best_resnet50_model.pth"
mels_dir = "E:/birdclef-2024/unlabeled_mels"
output_csv = "E:/birdclef-2024/unlabeled_predictions.csv"

# ✅ Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ✅ Load model
model = models.resnet50(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 181)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# ✅ Transforms
inference_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ✅ Check existing predictions to resume
existing_files = set()
if os.path.exists(output_csv):
    existing_df = pd.read_csv(output_csv)
    existing_files = set(existing_df['filename'].tolist())
    results = existing_df.values.tolist()
else:
    results = []

# ✅ Inference
image_files = sorted([f for f in os.listdir(mels_dir) if f.endswith(".png")])
batch_size = 1000  # Save progress every 1000 files
buffer = []

with torch.no_grad():
    for idx, img_file in enumerate(tqdm(image_files)):
        if img_file in existing_files:
            continue

        img_path = os.path.join(mels_dir, img_file)
        image = Image.open(img_path).convert("RGB")
        tensor = inference_transforms(image).unsqueeze(0).to(device)

        outputs = model(tensor)
        probs = torch.softmax(outputs, dim=1).squeeze().cpu().numpy()

        buffer.append([img_file] + list(probs))

        # Save every `batch_size` predictions
        if len(buffer) >= batch_size:
            results.extend(buffer)
            df = pd.DataFrame(results, columns=["filename"] + [f"class_{i}" for i in range(181)])
            df.to_csv(output_csv, index=False)
            buffer = []  # Clear buffer

# ✅ Final save
if buffer:
    results.extend(buffer)
    df = pd.DataFrame(results, columns=["filename"] + [f"class_{i}" for i in range(181)])
    df.to_csv(output_csv, index=False)

print("✅ Inference completed and predictions saved.")


Using device: cpu


100%|██████████| 81279/81279 [00:00<00:00, 2881828.93it/s]

✅ Inference completed and predictions saved.





In [4]:
#INFERENCE MODEL ON UNLABELED SOUNDSCAPE USING EFFICIENT NET-B3

import os
import torch
import torch.nn as nn
from PIL import Image
import pandas as pd
from tqdm import tqdm
import numpy as np
import timm
from torchvision import transforms

# ✅ Paths
model_path = "best_efficientnet_b3_model.pth"
mels_dir = "E:/birdclef-2024/unlabeled_mels"
output_csv = "E:/birdclef-2024/unlabeled_predictions_b3.csv"

# ✅ Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ✅ Load EfficientNet-B3 model
model = timm.create_model("efficientnet_b3", pretrained=False, num_classes=181)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# ✅ Transforms (300x300 for EfficientNet-B3)
inference_transforms = transforms.Compose([
    transforms.Resize((300, 300)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)
])

# ✅ Check existing predictions to resume
existing_files = set()
if os.path.exists(output_csv):
    existing_df = pd.read_csv(output_csv)
    existing_files = set(existing_df['filename'].tolist())
    results = existing_df.values.tolist()
else:
    results = []

# ✅ Inference loop
image_files = sorted([f for f in os.listdir(mels_dir) if f.endswith(".png")])
batch_size = 1000
buffer = []

with torch.no_grad():
    for idx, img_file in enumerate(tqdm(image_files)):
        if img_file in existing_files:
            continue

        img_path = os.path.join(mels_dir, img_file)
        image = Image.open(img_path).convert("RGB")
        tensor = inference_transforms(image).unsqueeze(0).to(device)

        outputs = model(tensor)
        probs = torch.softmax(outputs, dim=1).squeeze().cpu().numpy()

        buffer.append([img_file] + list(probs))

        if len(buffer) >= batch_size:
            results.extend(buffer)
            df = pd.DataFrame(results, columns=["filename"] + [f"class_{i}" for i in range(181)])
            df.to_csv(output_csv, index=False)
            buffer = []

# ✅ Final save
if buffer:
    results.extend(buffer)
    df = pd.DataFrame(results, columns=["filename"] + [f"class_{i}" for i in range(181)])
    df.to_csv(output_csv, index=False)

print("✅ Inference completed and predictions saved.")


Using device: cpu


100%|██████████| 81279/81279 [1:39:04<00:00, 13.67it/s]  


✅ Inference completed and predictions saved.
