## Importing necessary libraries

In [1]:
from datasets import load_dataset
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch

## Load the data

In [2]:
def load_audio_data(data_name, split="train[:100]"):
    data = load_dataset(data_name, split=split)
    print(f"Dataset structure: {data}")
    return data

In [3]:
# # Load the data
# data = load_dataset("charris/hubert_process_filter_spotify", split="train[:100]")
# # Check the structure dataset
# print(data)

## Select audio files

In [4]:
def select_audio(data, num_files=5):
    audio_files = data[:num_files]['audio']
    print(f"Selected {num_files} audio files")
    return audio_files

In [5]:
# Select the first audio files
# audio_files = data[:5]['audio']

## Load the model and processor


In [6]:
# Load the Wav2Vec2 model and processor for speech-to-text
def load_model_and_processor(model_name="facebook/wav2vec2-large-960h"):
    processor = Wav2Vec2Processor.from_pretrained(model_name)
    model = Wav2Vec2ForCTC.from_pretrained(model_name)
    print("Model and processor loaded")
    return processor, model

## Audio transcription function

In [7]:
# Transcribe an audio file using the Wav2Vec2 model.
def transcribe_audio(audio_file, processor, model):
    # Preprocess the audio
    input_values = processor(audio_file['array'], return_tensors="pt", sampling_rate=audio_file['sampling_rate']).input_values
    
    # Obtain the logits from the model
    with torch.no_grad():
        logits = model(input_values).logits
    
    # Decode the logits to text
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.decode(predicted_ids[0])
    
    return transcription

## Transcribe and save the selected audio files

In [8]:
# Transcribe a list of audio files.
def transcribe_audio_files(audio_file, processor, model):
    transcriptions = []
    for audio_file in audio_files:
        transcription = transcribe_audio(audio_file, processor, model)
        transcriptions.append(transcription)
        print(f"Transcription: {transcription}")
    return transcription

In [9]:
# Save the transcriptions to a text file
def save_transcriptions(transcriptions, file_name="transcriptions.txt"):
    with open("transcriptions.txt", "w") as file:
        for i, transcription in enumerate(transcriptions):
            file.write(f"Audio {i+1}: {transcription}\n")
        print(f"Transcription saved to {file_name}")

In [10]:
if __name__ == "__main__":
    # Step 1: Load the dataset
    dataset = load_audio_data("charris/hubert_process_filter_spotify", split="train[:100]")
    
    # Step 2: Select audio files
    audio_files = select_audio(dataset, num_files=5)
    
    # Step 3: Load the model and processor
    processor, model = load_model_and_processor()
    
    # Step 4: Transcribe audio files
    transcriptions = transcribe_audio_files(audio_files, processor, model)
    
    # Step 5: Save transcriptions to a file
    save_transcriptions(transcriptions)

Dataset structure: Dataset({
    features: ['audio', 'transcription', 'input_values', 'input_length', 'labels'],
    num_rows: 100
})
Selected 5 audio files


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model and processor loaded
Transcription: ABOUT PEOPLE HAVING A URINATE ON THEMSELVES LIKE BECAUSE THERE WAS NO SPACE I COULD ALMOST FEEL THE ILLNESS IN THE SICKNESS AND BUT I ALSO LOVE HOW IN THAT SAME THING IN THAT IMAGE SREE HE WAS LIKE MY WANCS ALL TO KNOW THAT OUR ANCESTORS DID FIGHT BACK AND HE SAID A LOT OF TIMES I HAVE PEOPLE NOT HOW MY PERNT A LIKE WHY DIDN'T OUR ANCESTORS FIGHT AND LIKE GET ANGRY ABOUT THEM HE'S LIKE NODEY DID I WANT SHE KNOW MANY PEOPLE LOST THEIR LIFE AT THIS CASTLE TRYING TO FIGHT BACK AH TO ME THAT WAS THE MOST
Transcription: SO LONG STORY SHORT I AM JUST GOING THROUGH A VERY INTERESTING PHASE RIGHT NOW AND MY MENTAL HEALTH IS VERY INTERESTING BUT I HAVE SUCH AMAZING PEOPLE AROUND ME I HAVE SO MANY SAFE SPACES AM AND SO I'M GETTING THE HEALP THAT I NEED AND IT'S GLORIOUS BUT I'VE JUST REALIZED THAT CREATING HAS BECOME SO DIFFICULT AND WHEN I WAS YOUNGER WHEN I HAD MOMENTS WHEN MY MENTAL HEALTH WAS AM WEIGHING ME DOWN ALT WAS JUST A LOT OR IF WHEN I WAS DE