<a href="https://colab.research.google.com/github/BenUCL/Reef-acoustics-and-AI/blob/main/Code/Splitting_and_dowsampling_audio_files.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Slice up audio files**

This script can slice longer files into 1min chunks, downsample if desired (e.g discard 4 mins, keep 1 min), and name them with the correct timestamp. This is done for files with a naming convention set by Lamont (2021) and for data that uses the Soundtrap hydrophone naming convention. This code could be adapted for other naming conventions e.g Hydromoths. This can be run on one file (top) or all files in a folder (bottom).

Please cite Williams et al (2023) if you use this code.

In [None]:
!pip3 install pydub
from pydub import AudioSegment
import math
import pandas as pd
from datetime import timedelta
import os 
import glob

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


**Using Lamont (2021) and Williams (2022) naming convention**

In [None]:
#Tims naming convention
#class adapted from S/O:
 #https://stackoverflow.com/questions/37999150/how-to-split-a-wav-file-into-multiple-wav-files

class SplitWavAudioMubin():
  """This class contains functions to split soundtrap .wav files with into seperate files
  e.g turn 1hr into 60*1min files. It uses the time held within the original files name 
  to name the new files accordings.
  
  Input:
  folder = path to the folder with the file to be split
  file = the file within the folder to split
  export_folder = the folder where the new files will be exported"""

  def __init__(self, folder, filename, export_folder):
        self.folder = folder
        self.filename = filename
        self.export_folder = export_folder
        self.filepath = folder + '/' + filename
        self.audio = AudioSegment.from_wav(self.filepath)
    
  def get_duration(self):
        return self.audio.duration_seconds
    
  def single_split(self, from_min, to_min, split_filename):
        t1 = from_min * 60 * 1000
        t2 = to_min * 60 * 1000
        split_audio = self.audio[t1:t2]
        #split_audio.export(self.export_folder + '/' + split_filename, format="wav")   #change self.folder to self.export_folder
        
  def multiple_split(self, min_per_split):
        if self.folder == self.export_folder:
          print('Export folder must be different to folder')
          return None
        else:
          total_mins = math.ceil(self.get_duration() / 60)
          for i in range(0, total_mins, min_per_split):
              #Tims naming convention
              #filename = 'BaF1.1055H.1678278701.180827.3.35.wav'
              #The hour and mins were moved to start and then some stuff added at end, fixing this so each new file is the old name, 
              #showing start time at end (e.g 1055), and new name is this + T + i mins at the end, so if i = 7 it becomes T1102 at end
              
              #remove the end bits and put Hr:Min at end
              t1 = self.filename.split(".")[3] #get hours
              t2 = self.filename.split(".")[1][0:4] 
              t = t1+t2

              #Get the hour and min of new start time
              original_file_start_time = pd.to_datetime(t, format='%y%m%d%H%M')
              new_file_start_time = (original_file_start_time + timedelta(minutes=i)).strftime('%H.%M.%S')
              hour = new_file_start_time[0:2]
              min = new_file_start_time[3:5]
              
              #Combine the seperate bits to get the new name in full
              old_name = self.filename.split(".")#[:-4]
              new_name = str(old_name[0]) + '.' + str(old_name[1]) + '.' + str(old_name[2] + '.' + t1 + '.NT' + hour + min + '.wav')
              print('Creating: ' + new_name)

              #execute the single_function
              self.single_split(i, i+min_per_split, new_name)    
              if i == total_mins - min_per_split:
                  print('Splits completed successfully')


#Added an additional function to execute the above class on multiple files ina folder
 #Should probably integrate this into the class but have not done yet
def split_multiple_files(folder, export_folder, min_per_split):
  "split all files in a folder into 1min chunks in a different folder"
  print('Found files: ')
  os.chdir(folder)
  for file in glob.glob("*.wav"):
    print(file)
  for file in glob.glob("*.wav"):
    print('Now splitting: ' + file)
    split_wav = SplitWavAudioMubin(folder, file, export_folder)
    split_wav.multiple_split(min_per_split=min_per_split)



# Run the code on just one file

In [None]:


##set these parameters:
#folder = r'/content/drive/MyDrive/More audio/tims_naming _convention'
#file = 'BaF1.1055H.1678278701.180827.3.35.wav'
#export_folder = r'/content/drive/MyDrive/More audio/tims_naming _convention/export_folder' #must be different to folder with original files in
#min_per_split = 2

##These will then execute:
#split_wav = SplitWavAudioMubin(folder, file, export_folder)
#split_wav.multiple_split(min_per_split=min_per_split)


Creating: BaF1.1055H.1678278701.NT1055.wav
Creating: BaF1.1055H.1678278701.NT1057.wav
Creating: BaF1.1055H.1678278701.NT1059.wav
Creating: BaF1.1055H.1678278701.NT1101.wav
Creating: BaF1.1055H.1678278701.NT1103.wav
Splits completed successfully


# Run the code on all files in a folder

In [None]:
folder = r'/content/drive/MyDrive/More audio/tims_naming _convention' #change
export_folder = r'/content/drive/MyDrive/More audio/tims_naming _convention/export_folder' #change
min_per_split = 1
split_multiple_files(folder, export_folder, min_per_split)

#split_wav = SplitWavAudioMubin(folder, file, export_folder)
#split_wav.split_multiple_files(folder, export_folder, min_per_split)

Found files: 
BaF1.1055H.1678278701.180827.3.35.wav
BaF1.1230H.1678278701.180827.3.35.wav
Now splitting: BaF1.1055H.1678278701.180827.3.35.wav
Creating: BaF1.1055H.1678278701.180827.NT1055.wav
Creating: BaF1.1055H.1678278701.180827.NT1056.wav
Creating: BaF1.1055H.1678278701.180827.NT1057.wav
Creating: BaF1.1055H.1678278701.180827.NT1058.wav
Creating: BaF1.1055H.1678278701.180827.NT1059.wav
Creating: BaF1.1055H.1678278701.180827.NT1100.wav
Creating: BaF1.1055H.1678278701.180827.NT1101.wav
Creating: BaF1.1055H.1678278701.180827.NT1102.wav
Creating: BaF1.1055H.1678278701.180827.NT1103.wav
Creating: BaF1.1055H.1678278701.180827.NT1104.wav
Splits completed successfully
Now splitting: BaF1.1230H.1678278701.180827.3.35.wav
Creating: BaF1.1230H.1678278701.180827.NT1230.wav
Creating: BaF1.1230H.1678278701.180827.NT1231.wav
Creating: BaF1.1230H.1678278701.180827.NT1232.wav
Creating: BaF1.1230H.1678278701.180827.NT1233.wav
Creating: BaF1.1230H.1678278701.180827.NT1234.wav
Creating: BaF1.1230H.167

# Use the Soundtrap naming convention

In [None]:
#Soundtrap naming convention


class SplitWavAudioMubin():
  """This class contains functions to split soundtrap .wav files with into seperate files
  e.g turn 1hr into 60*1min files. It uses the time held within the original files name 
  to name the new files accordings.
  
  Input:
  folder = path to the folder with the file to be split
  file = the file within the folder to split
  export_folder = the folder where the new files will be exported"""

  def __init__(self, folder, filename, export_folder):
        self.folder = folder
        self.filename = filename
        self.export_folder = export_folder
        self.filepath = folder + '/' + filename
        self.audio = AudioSegment.from_wav(self.filepath)
    
  def get_duration(self):
        return self.audio.duration_seconds
    
  def single_split(self, from_min, to_min, split_filename):
        t1 = from_min * 60 * 1000
        t2 = to_min * 60 * 1000
        split_audio = self.audio[t1:t2]
        split_audio.export(self.export_folder + '/' + split_filename, format="wav")   #change self.folder to self.export_folder
        
  def multiple_split(self, min_per_split):
        if self.folder == self.export_folder:
          print('Export folder must be different to folder')
          return None
        else:
          total_mins = math.ceil(self.get_duration() / 60)
          for i in range(0, total_mins, min_per_split):
              #Tims naming convention
              #filename = 'BaF1.1055H.1678278701.180827.3.35.wav'
              #The hour and mins were moved to start and then some stuff added at end, fixing this so each new file is the old name, 
              #showing start time at end (e.g 1055), and new name is this + T + i mins at the end, so if i = 7 it becomes T1102 at end
              
              #rip timestamp from soundtrap file
              t = self.filename[-16:-4:]

              #Get the hour and min of new start time
              original_file_start_time = pd.to_datetime(t, format='%y%m%d%H%M%S')
              new_file_start_time = (original_file_start_time + timedelta(minutes=i)).strftime('%y%m%d%H%M%S')
              new_name = str(self.filename[:len(self.filename) - 16]) + str(new_file_start_time)
              print('Creating: ' + new_name)

              #execute the single_function
              self.single_split(i, i+min_per_split, new_name)    
              if i == total_mins - min_per_split:
                  print('Splits completed successfully')




#Added an additional function to execute the above class on multiple files ina folder
 #Should probably integrate this into the class but have not done yet
def split_multiple_files(folder, export_folder, min_per_split):
  "split all files in a folder into 1min chunks in a different folder"
  print('Found files: ')
  os.chdir(folder)
  for file in glob.glob("*.wav"):
    print(file)
  for file in glob.glob("*.wav"):
    print('Now splitting: ' + file)
    split_wav = SplitWavAudioMubin(folder, file, export_folder)
    split_wav.multiple_split(min_per_split=min_per_split)


# Run the code on all files in a folder

In [None]:
#run the code on all files in a folder

folder = r'/content/drive/MyDrive/More audio/soundtrap_naming_convention' #change
export_folder = r'/content/drive/MyDrive/More audio/soundtrap_naming_convention/export_folder' #change
min_per_split = 1
split_multiple_files(folder, export_folder, min_per_split)

#split_wav = SplitWavAudioMubin(folder, file, export_folder)
#split_wav.split_multiple_files(folder, export_folder, min_per_split)

Found files: 
805322778.181023124723.wav
805322778.181031235823.wav
Now splitting: 805322778.181023124723.wav
Creating: 805322778.181023124723
Creating: 805322778.181023124823
Creating: 805322778.181023124923
Creating: 805322778.181023125023
Creating: 805322778.181023125123
Creating: 805322778.181023125223
Creating: 805322778.181023125323
Creating: 805322778.181023125423
Creating: 805322778.181023125523
Creating: 805322778.181023125623
Splits completed successfully
Now splitting: 805322778.181031235823.wav
Creating: 805322778.181031235823
Creating: 805322778.181031235923
Creating: 805322778.181101000023
Creating: 805322778.181101000123
Creating: 805322778.181101000223
Creating: 805322778.181101000323
Creating: 805322778.181101000423
Creating: 805322778.181101000523
Creating: 805322778.181101000623
Creating: 805322778.181101000723
Splits completed successfully
