# boutRight ¯\\_(ツ)_/¯ YOLO
## **Training**
This code takes the labelImg (https://github.com/ArmendLee/labelImg-1.8.0.git) annotated images and splits the data into 80 % for trainign and 20 % for validation. <br>

# **Set environment** <br>
conda create -n yolov5-env python=3.8 <br>
conda activate yolov5-env <br>
conda install pip <br>
conda install pytorch torchvision torchaudio cudatoolkit=11.3 -c pytorch

# **Download yolov5**
git clone https://github.com/ultralytics/yolov5.git <br>
cd yolov5 <br>
#install <br>
pip install -r requirements.txt <br>

# **Modify Configuration: Update the data.yaml file to point to your dataset.**
data.yaml <br>
train: C:\Users\ucsfg\Documents\Code\boutRight_v3\dataset\images\train <br>
val: C:\Users\ucsfg\Documents\Code\boutRight_v3\dataset\images\val <br>
 <br>
nc: 1 <br>
names: ['bout'] <br>


# **Define a function to generate spectrogram images from .wav files**

In [7]:
from scipy import signal
from scipy.io import wavfile
import numpy as np
import torch
from PIL import Image, ImageDraw
import io
import os
import math
from glob import glob
from IPython.display import clear_output

def filtered_spectrogram(filepath):
    #length of fft
    lend = 34
    #overlap of fft
    overlap =33
    #time length for exponential window of fft
    ts = 3
    #low cut frequency in Hz
    lc = 500
    #high cut frequency in Hz
    hc = 20000
    # color of image settings
    # contribution of each channel to color
    RGBch = [0.8,1.5,1.5]

    #import the audio data
    fs, data = wavfile.read(filepath)
    #round length of data and overlap
    lend = round((lend/1E3)*fs)
    overlap =round((overlap/1E3)*fs)
    #next power of two definition
    def nextpow2(x):
        return 1 if x == 0 else 2**math.ceil(math.log2(x))
    #calculate next power of two
    nfft = nextpow2(lend)

    # Butterworth filter
    def butter_bp(data, lc, hc, fs, order=3):
       nyq = 0.5*fs
       low = lc/nyq
       high = hc/nyq
       b,a = signal.butter(order,[low, high], btype='band')
       data_filtered = signal.lfilter(b, a, data)
       return data_filtered

    data = butter_bp(data, lc, hc, fs, order=5)
    #normalize signal
    data = data/max(abs(data))
    # make windows for spectrogram
    t = np.linspace(-lend/2+1,lend/2,num=lend)
    sigma = (ts/1E3)*fs
    w = np.exp(-(t/sigma)**2)
    #dw = -2*w*(t/(sigma**2))
    dw = np.exp(-(t/(2*sigma))**2)
    #Calculate spectrograms
    [f,t,sx] = signal.spectrogram(data,fs=fs, window=w, noverlap=overlap, nfft=nfft )
    [_,_,sxx] = signal.spectrogram(data,fs=fs, window=dw, noverlap=overlap, nfft=nfft)
    #average of both spectrograms
    image_array = np.log2(abs(sx)+abs(sx))/2
    #obtain tresholds for background
    minmax = [np.percentile(image_array ,80),np.percentile(image_array,99)]
    #subtract backgroud
    image_array = np.minimum(image_array,minmax[1])
    image_array = np.maximum(image_array,minmax[0])
    #normalize
    image_array = (image_array-np.min(image_array))/(np.max(image_array)-np.min(image_array))
    #flip spectrogram
    image_array = np.flip(image_array,0)
    #convert to color
    sz = (image_array.shape[0]-1,image_array.shape[1]-1,3)
    image_color = np.zeros(sz)
    tmp = image_array
    image_color[:,:,0] = RGBch[0]*tmp[0:-1,0:-1]
    tmp = np.diff(image_array,1,axis=0)
    image_color[:,:,1] = RGBch[1]*tmp[:,0:-1]
    tmp = np.diff(image_array,1,axis=1)
    image_color[:,:,2] = RGBch[2]*tmp[0:-1,:]
    
    return image_color, fs, len(data)


# **Generate PNG files for labelImg annotation**

In [3]:
import os
from glob import glob
import numpy as np
from PIL import Image
import time
from IPython.display import clear_output
import concurrent.futures
from tqdm import tqdm

# Directory paths
wav_files = glob(os.path.join(os.getcwd(), 'songs_for_training', '*.wav'))
images_dir = os.path.join(os.getcwd(), 'images_for_training')

# Create images_for_training directory if it doesn't exist
if not os.path.exists(images_dir):
    os.makedirs(images_dir)

total_time = 0
num_files_processed = 0

# Function to process each WAV file
def process_wav_file(wav_file):
    global total_time, num_files_processed
    start_time = time.time()
    
    # Generate the expected output .png file name
    img_name = os.path.basename(wav_file).replace('.wav', '.png')
    img_path = os.path.join(images_dir, img_name)

    # Skip processing if the image already exists
    if os.path.exists(img_path):
        clear_output(wait=True)
        print(f"Image already exists for {wav_file}, skipping...")
        return

    try:
        # Generate the spectrogram from the wav file using the original method
        spectrogram, fs, data_length = filtered_spectrogram(wav_file)
        
        # Convert the spectrogram to an image and save it
        spectrogram_image = (spectrogram * 255).astype(np.uint8)
        pil_image = Image.fromarray(spectrogram_image)
        pil_image.save(img_path)
        
        end_time = time.time()
        elapsed_time = end_time - start_time
        total_time += elapsed_time
        num_files_processed += 1
        avg_time_per_file = total_time / num_files_processed
        
        # Clear the output and print the timing information
        clear_output(wait=True)
        print(f"Saved image {img_path} in {elapsed_time:.2f} seconds.")
        print(f"Average time per file: {avg_time_per_file:.2f} seconds.")
        
        # Close the image file to free up resources
        pil_image.close()
    except Exception as e:
        print(f"Error processing {wav_file}: {e}")

# Process WAV files in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
    list(tqdm(executor.map(process_wav_file, wav_files), total=len(wav_files), desc="Processing WAV files"))

print("Processing complete.")


Image already exists for C:\Users\ucsfg\Documents\Code\boutRight_v3\songs_for_training\R97_45475.41842364_7_2_11_37_22.wav, skipping...


To annotate images use labelImg.
However, the boutRight environment runs on python 12 and labelIMG doesnt run on python 12.
Thus make a new environment
conda env create -n imageannotation python=3.8 labelImg
conda activate imageannotation
labelImg
#make sure to select 'auto save' and 'advance mode' also make sure to use default class as 'bout' or 'call' when annotating.
use your keyboard 'A' or 'D' keys to move backwards or forwards


# **Split Dataset**


In [1]:
#Make sure to update data.yaml file so that it points to your dataset path
os.chdir(r'C:\Users\ucsfg\Documents\Code\boutRight_v3')
path_to_data = os.path.join(os.getcwd(),'data.yaml')

!python split_dataset.py

NameError: name 'os' is not defined

# **Run training**

In [6]:
path_to_weights = os.path.join(os.getcwd(),'yolov5','runs','train','exp6','best.pt')
path_to_train = os.path.join(os.getcwd(),'yolov5','train.py')
!python {path_to_train} --img 640 --batch 16 --epochs 500 --data {path_to_data} --weights {path_to_weights}  --cache

Traceback (most recent call last):
  File "C:\Users\ucsfg\Documents\Code\boutRight_v3\yolov5\train.py", line 100, in <module>
    GIT_INFO = check_git_info()
               ^^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\boutRight\Lib\contextlib.py", line 81, in inner
    return func(*args, **kwds)
           ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ucsfg\Documents\Code\boutRight_v3\yolov5\utils\general.py", line 408, in check_git_info
    commit = repo.head.commit.hexsha  # i.e. '3134699c73af83aac2a481435550b968d5792c0d'
             ^^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\boutRight\Lib\site-packages\git\refs\symbolic.py", line 297, in _get_commit
    obj = self._get_object()
          ^^^^^^^^^^^^^^^^^^
  File "c:\ProgramData\anaconda3\envs\boutRight\Lib\site-packages\git\refs\symbolic.py", line 288, in _get_object
    return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# **Evaluate network**



In [None]:
!python val.py --data data.yaml --weights runs/train/exp/weights/best.pt --img 640

In [1]:
import os
import xml.etree.ElementTree as ET
from PIL import Image

def add_image_dimensions_to_xml(xml_folder, image_folder):
    # Iterate over all XML files in the folder
    for filename in os.listdir(xml_folder):
        if filename.endswith(".xml"):
            xml_path = os.path.join(xml_folder, filename)
            
            # Parse the XML file
            tree = ET.parse(xml_path)
            root = tree.getroot()
            
            # Get the image file name from the XML file
            image_filename = root.find('filename').text
            
            # Get the image path
            image_path = os.path.join(image_folder, image_filename)
            
            # Open the image and get its dimensions
            with Image.open(image_path) as img:
                width, height = img.size
                depth = len(img.getbands())
            
            # Create the <size> element and add it to the XML
            size_element = ET.Element('size')
            width_element = ET.SubElement(size_element, 'width')
            width_element.text = str(width)
            height_element = ET.SubElement(size_element, 'height')
            height_element.text = str(height)
            depth_element = ET.SubElement(size_element, 'depth')
            depth_element.text = str(depth)
            
            # Find the position of the <segmented> element
            segmented_index = None
            for i, elem in enumerate(root):
                if elem.tag == 'segmented':
                    segmented_index = i
                    break
            
            # Insert the <size> element before the <segmented> element
            if segmented_index is not None:
                root.insert(segmented_index, size_element)
            
            # Save the modified XML file
            tree.write(xml_path)

# Call the function to add image dimensions to XML files in the "anno" folder
add_image_dimensions_to_xml(r"C:\Users\ucsfg\Documents\Code\boutRight_v3\annotations", r"C:\Users\ucsfg\Documents\Code\boutRight_v3\images_for_training")

print("Image dimensions were successfully added to XML files in the 'anno' folder.")


Image dimensions were successfully added to XML files in the 'anno' folder.


In [3]:
import os
from glob import glob
import shutil
import torch
from PIL import Image
import numpy as np
from scipy.io import wavfile
from tqdm import tqdm
import io
from IPython.display import clear_output
import concurrent.futures
import uuid

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'custom', path=r'C:\Users\ucsfg\Documents\Code\boutRight_v3\yolov5\runs\train\exp13\weights\best.pt')

# Directory paths
path_to_wavs = r'C:\Users\ucsfg\Documents\Code\boutRight_v3\test\225'
songs_dir = os.path.join(path_to_wavs, 'Songs')
noise_calls_dir = os.path.join(path_to_wavs, 'Noise_Calls')

# Create directories if they don't exist
os.makedirs(songs_dir, exist_ok=True)
os.makedirs(noise_calls_dir, exist_ok=True)

# Function to generate spectrogram and check for bouts using YOLOv5
def check_for_bouts(wav_file):
    try:
        spectrogram, fs, data_length = filtered_spectrogram(wav_file)
        spectrogram_image = (spectrogram * 255).astype(np.uint8)
        pil_image = Image.fromarray(spectrogram_image)
        
        # Generate a unique filename for the temporary spectrogram image in the same directory as the WAV file
        temp_img_path = os.path.join(os.path.dirname(wav_file), f'temp_spectrogram_{uuid.uuid4().hex}.png')
        pil_image.save(temp_img_path)
        
        # Verify the saved image
        with Image.open(temp_img_path) as img:
            img.verify()
        
        # Use YOLOv5 to detect bouts in the spectrogram image
        results = model(temp_img_path)
        
        # Remove the temporary image file
        os.remove(temp_img_path)
        
        # Check if any bouts are detected
        return len(results.xyxy[0]) > 0
    except Exception as e:
        print(f"Error in check_for_bouts for {wav_file}: {e}")
        return False

# Function to process each WAV file
def process_wav_file(wav_file):
    # Clear the output
    clear_output(wait=True)
    try:
        if check_for_bouts(wav_file):
            shutil.move(wav_file, os.path.join(songs_dir, os.path.basename(wav_file)))
            print(f"Moved {wav_file} to Songs")
        else:
            shutil.move(wav_file, os.path.join(noise_calls_dir, os.path.basename(wav_file)))
            print(f"Moved {wav_file} to Noise_Calls")
        
    except Exception as e:
        print(f"Error processing {wav_file}: {e}")

# Process WAV files in parallel
wav_files = glob(os.path.join(path_to_wavs, '*.wav'))
with concurrent.futures.ThreadPoolExecutor() as executor:
    list(tqdm(executor.map(process_wav_file, wav_files), total=len(wav_files), desc="Processing WAV files"))


Using cache found in C:\Users\ucsfg/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-8-22 Python-3.12.5 torch-2.4.0 CUDA:0 (NVIDIA RTX A2000, 6138MiB)

Fusing layers... 
Model summary: 166 layers, 7053910 parameters, 0 gradients
Adding AutoShape... 
Processing WAV files: 0it [00:00, ?it/s]


In [18]:
import os
from glob import glob
import shutil
import torch
from PIL import Image
import numpy as np
from scipy.io import wavfile
from tqdm import tqdm
import io
from IPython.display import clear_output
import concurrent.futures
import uuid

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'custom', path=r'C:\Users\ucsfg\Documents\Code\boutRight_v3\yolov5\runs\train\exp10\weights\best.pt')

# Directory paths
path_to_wavs = r'C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206'
songs_dir = os.path.join(path_to_wavs, 'Songs')
noise_calls_dir = os.path.join(path_to_wavs, 'Noise_Calls')

# Create directories if they don't exist
os.makedirs(songs_dir, exist_ok=True)
os.makedirs(noise_calls_dir, exist_ok=True)

# Function to generate spectrogram and check for bouts using YOLOv5
def check_for_bouts(wav_file):
    try:
        spectrogram, fs, data_length = filtered_spectrogram(wav_file)
        spectrogram_image = (spectrogram * 255).astype(np.uint8)
        pil_image = Image.fromarray(spectrogram_image)
        
        # Save the spectrogram image temporarily
        temp_img_path = os.path.join(os.path.dirname(wav_file), f'temp_spectrogram_{uuid.uuid4().hex}.png')
        pil_image.save(temp_img_path)
        
        # Verify the saved image
        with Image.open(temp_img_path) as img:
            img.verify()
        
        # Use YOLOv5 to detect bouts in the spectrogram image
        results = model(temp_img_path)
        
        # Remove the temporary image file
        os.remove(temp_img_path)
        
        # Count the number of "calls" and check for "bouts"
        call_count = sum(1 for *_, conf, cls in results.xyxy[0] if cls == 1 and conf >= 0.8)  # Assuming class 1 is "call"
        bout_detected = any(cls == 0 and conf >= 0.8 for *_, conf, cls in results.xyxy[0])  # Assuming class 0 is "bout"
        
        return call_count, bout_detected
    except Exception as e:
        print(f"Error in check_for_bouts for {wav_file}: {e}")
        return 0, False

# Function to process each WAV file
def process_wav_file(wav_file):
    # Clear the output
    #clear_output(wait=True)
    try:
        call_count, bout_detected = check_for_bouts(wav_file)
        return wav_file, call_count, bout_detected
    except Exception as e:
        print(f"Error processing {wav_file}: {e}")
        return wav_file, 0, False

# Process WAV files in parallel
wav_files = glob(os.path.join(path_to_wavs, '*.wav'))
results = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    results = list(tqdm(executor.map(process_wav_file, wav_files), total=len(wav_files), desc="Processing WAV files"))

# Move files with bouts to the Songs directory
for wav_file, _, bout_detected in results:
    if bout_detected:
        shutil.move(wav_file, os.path.join(songs_dir, os.path.basename(wav_file)))
        print(f"Moved {wav_file} to Songs")

# Filter out files with bouts and sort the remaining files by call count
results = [(wav_file, call_count) for wav_file, call_count, bout_detected in results if not bout_detected]
results.sort(key=lambda x: x[1], reverse=True)
top_100_files = results[:100]

# Move the top 100 files with the highest number of calls to the Noise_Calls directory
for wav_file, _ in top_100_files:
    shutil.move(wav_file, os.path.join(noise_calls_dir, os.path.basename(wav_file)))
    print(f"Moved {wav_file} to Noise_Calls")


Using cache found in C:\Users\ucsfg/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2024-8-22 Python-3.12.5 torch-2.4.0 CUDA:0 (NVIDIA RTX A2000, 6138MiB)

Fusing layers... 
Model summary: 166 layers, 7056607 parameters, 0 gradients
Adding AutoShape... 
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
Processing WAV files:   6%|▋         | 29/460 [01:37<14:41,  2.05s/it]  

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24277537_7_25_6_44_37.wav: The size of tensor a (6) must match the size of tensor b (2) at non-singleton dimension 2


Processing WAV files:  12%|█▏        | 55/460 [01:58<05:22,  1.25it/s]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24683753_7_25_6_51_23.wav: The size of tensor a (28) must match the size of tensor b (12) at non-singleton dimension 2


Processing WAV files:  32%|███▏      | 147/460 [04:12<13:20,  2.56s/it]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.26892488_7_25_7_28_12.wav: The size of tensor a (10) must match the size of tensor b (5) at non-singleton dimension 2


Processing WAV files:  33%|███▎      | 153/460 [04:23<11:37,  2.27s/it]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.26084618_7_25_7_14_44.wav: Sizes of tensors must match except in dimension 4. Expected size 1 but got size 2 for tensor number 1 in the list.


Processing WAV files:  38%|███▊      | 177/460 [04:32<02:04,  2.27it/s]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.26605275_7_25_7_23_25.wav: The size of tensor a (8) must match the size of tensor b (24) at non-singleton dimension 2
Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.27237610_7_25_7_33_57.wav: The size of tensor a (12) must match the size of tensor b (8) at non-singleton dimension 2
Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.26786151_7_25_7_26_26.wav: The size of tensor a (4) must match the size of tensor b (8) at non-singleton dimension 2


Processing WAV files:  46%|████▋     | 213/460 [05:22<04:45,  1.16s/it]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.28360224_7_25_7_52_40.wav: The size of tensor a (40) must match the size of tensor b (16) at non-singleton dimension 2


Processing WAV files:  68%|██████▊   | 314/460 [06:52<02:09,  1.12it/s]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.29545904_7_25_8_12_25.wav: The size of tensor a (28) must match the size of tensor b (16) at non-singleton dimension 2


Processing WAV files:  71%|███████   | 327/460 [07:07<02:17,  1.03s/it]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.29639030_7_25_8_13_59.wav: The size of tensor a (10) must match the size of tensor b (8) at non-singleton dimension 2
Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.29209372_7_25_8_6_49.wav: The size of tensor a (4) must match the size of tensor b (12) at non-singleton dimension 2
Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.29651289_7_25_8_14_11.wav: The size of tensor a (6) must match the size of tensor b (18) at non-singleton dimension 2


Processing WAV files:  91%|█████████ | 417/460 [08:46<00:39,  1.09it/s]

Error in check_for_bouts for C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.30843772_7_25_8_34_3.wav: The size of tensor a (8) must match the size of tensor b (24) at non-singleton dimension 2


Processing WAV files: 100%|██████████| 460/460 [08:56<00:00,  1.17s/it]

Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.23606202_7_25_6_33_26.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.23988909_7_25_6_39_48.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24050781_7_25_6_40_50.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24075389_7_25_6_41_15.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24085463_7_25_6_41_25.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24138708_7_25_6_42_18.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24151068_7_25_6_42_31.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24158624_7_25_6_42_38.wav to Songs
Moved C:\Users\ucsfg\Documents\Code\boutRight_v3\test\mic\206\R159_45498.24189829_7_25_6_43_9.wav to Songs
Moved C:\Users\ucsfg\Document


