Be sure to rename zip file to wavs.zip before running below cells and it should be compressed accordingly before upload:

for example -> wavs/data/file1.wav,file2.wav,....

In [1]:
%%bash
pip install pytaglib
unzip /content/wavs.zip

Collecting pytaglib
  Downloading pytaglib-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.9/1.9 MB 34.8 MB/s eta 0:00:00
Installing collected packages: pytaglib
Successfully installed pytaglib-3.0.0
Archive:  /content/wavs.zip
  inflating: 1.wav                   
  inflating: 10.wav                  
  inflating: 100.wav                 
  inflating: 11.wav                  
  inflating: 12.wav                  
  inflating: 13.wav                  
  inflating: 14.wav                  
  inflating: 15.wav                  
  inflating: 16.wav                  
  inflating: 17.wav                  
  inflating: 18.wav                  
  inflating: 19.wav                  
  inflating: 2.wav                   
  inflating: 20.wav                  
  inflating: 21.wav                  
  inflating: 22.wav                  
  inflating: 23.wav                  
  inflating: 24.wav                  
  inflating: 2

In [None]:
import os
import shutil
import taglib
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import librosa
import soundfile as sf

# Define the paths if this shows error try to have 3 saperate foldeders
input_path = "/content/data/wavs"  # Change this to your .wav file folder
output_path = "/content/data/wavs_prepro"  # Path for preprocessed .wav files
metadata_output_path = "/content/data/wavs_metadata"  # Path for metadata updated .wav files
output_file = os.path.join(metadata_output_path, "list.txt")  # Path for output list file

# Ensure the output directories exist
os.makedirs(output_path, exist_ok=True)
os.makedirs(metadata_output_path, exist_ok=True)

# Step 1: Rename .wav files in the folder
def rename_wave_files(folder_path):
    try:
        files = os.listdir(folder_path)
        wav_files = [f for f in files if f.lower().endswith('.wav')]

        print(f'Folder path: {folder_path}')
        print(f'All files in the folder: {files}')
        print(f'Wave files found: {len(wav_files)}')

        for index, wav_file in enumerate(wav_files, start=1):
            old_path = os.path.join(folder_path, wav_file)
            new_path = os.path.join(folder_path, f'{index}.wav')
            os.rename(old_path, new_path)
            print(f'Renamed {old_path} to {new_path}')

        return len(wav_files)
    except Exception as e:
        print(f"Error in renaming wave files: {e}")
        return 0

# Step 2: Preprocess the .wav files (trim silence and normalize audio)
def preprocess_wave_files(input_path, output_path):
    try:
        for filename in os.listdir(input_path):
            if filename.endswith(".wav"):
                # Load the .wav file
                filepath = os.path.join(input_path, filename)
                y, sr = librosa.load(filepath, sr=22050)

                # Trim silence
                trimmed_audio, _ = librosa.effects.trim(y, top_db=20)

                # Normalize audio
                normalized_audio = librosa.util.normalize(trimmed_audio)

                # Save processed .wav file to the output folder
                output_filepath = os.path.join(output_path, filename)
                sf.write(output_filepath, normalized_audio, sr, subtype='PCM_16')

        print("All .wav files have been preprocessed and saved to the output folder.")
    except Exception as e:
        print(f"Error in preprocessing wave files: {e}")

# Step 3: Update metadata for the .wav files
def update_metadata(input_folder, output_folder, num_files):
    try:
        for i in range(1, num_files + 1):
            input_file = os.path.join(input_folder, f"{i}.wav")
            output_file = os.path.join(output_folder, f"{i}.wav")

            if os.path.exists(input_file):
                # Load WAV file and update metadata
                audio = taglib.File(input_file)
                audio.tags["TITLE"] = [f"{i}"]
                audio.tags["TRACKNUMBER"] = [f"{i}"]
                audio.save()

                # Copy the updated file to the output folder instead of moving it
                shutil.copy2(input_file, output_file)

                print(f"Updated metadata for {i}.wav: title='{i}', track number={i}")
            else:
                print(f"File {i}.wav not found.")
    except Exception as e:
        print(f"Error in updating metadata: {e}")

# Step 4: Transcribe the preprocessed .wav files and create the list.txt file
def transcribe_wave_files(wav_directory, output_file, model, processor, num_files):
    file_and_transcripts = []

    try:
        # Iterate through the .wav files
        for i in range(1, num_files + 1):
            wav_file = os.path.join(wav_directory, f"{i}.wav")

            # Check if the .wav file exists
            if os.path.exists(wav_file):
                try:
                    # Recognize the speech in the .wav file
                    waveform, sample_rate = torchaudio.load(wav_file)
                    waveform = waveform.squeeze()  # Squeeze the batch dimension
                    resampler = torchaudio.transforms.Resample(
                        orig_freq=sample_rate, new_freq=16000)
                    waveform = resampler(waveform)
                    input_values = processor(
                        waveform, return_tensors="pt", sampling_rate=16000).input_values
                    logits = model(input_values).logits
                    predicted_ids = torch.argmax(logits, dim=-1)
                    transcript = processor.decode(predicted_ids[0])

                    # Append the desired path format and transcript to the list
                    file_and_transcripts.append(
                        f"/content/TTS-TT2/wavs/{i}.wav|{transcript}")
                except Exception as e:
                    print(f"Error processing file {wav_file}: {e}")
            else:
                print(f"File not found: {wav_file}")

        # Write the file paths and transcripts to the output file
        with open(output_file, "w") as f:
            for line in file_and_transcripts:
                f.write(f"{line}\n")

        print(f"File '{output_file}' created successfully.")
    except Exception as e:
        print(f"Error in transcribing wave files: {e}")

if __name__ == "__main__":
    try:
        # Step 1: Rename .wav files
        num_files = rename_wave_files(input_path)
        if num_files == 0:
            raise ValueError("No .wav files found to process.")
        print("Wave files renamed successfully!")

        # Step 2: Preprocess .wav files
        preprocess_wave_files(input_path, output_path)

        # Step 3: Update metadata for the .wav files
        update_metadata(output_path, metadata_output_path, num_files)

        # Step 4: Initialize the wav2vec model and processor
        model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
        processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h")

        # Transcribe .wav files and create the list.txt file
        transcribe_wave_files(metadata_output_path, output_file, model, processor, num_files)
    except Exception as e:
        print(f"An error occurred: {e}")


In [None]:
# zip the metadata folder and download it for furtner use plase adjust accordingly

!zip -r wavs.zip /content/data/wavs_metadata
