In [1]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Set Working Directory
import os
BASE_DIR = '/content/drive/MyDrive/speech_understanding_project'
DATA_DIR = os.path.join(BASE_DIR, 'data')
os.makedirs(DATA_DIR, exist_ok=True)


Mounted at /content/drive


In [2]:
!pip install transformers datasets torchaudio librosa soundfile tqdm


Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchaudio)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (f

In [3]:
# 📍 Step 3: Install Dependencies

import torch
import torchaudio
import librosa
from tqdm import tqdm
import numpy as np
import soundfile as sf


In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'


In [5]:
from transformers import Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor, Wav2Vec2Processor, Wav2Vec2ForCTC
import torchaudio
import torch

# Correct model name
model_id = "facebook/wav2vec2-xlsr-53-espeak-cv-ft"

# Load components separately
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_id)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_id)
processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)
model = Wav2Vec2ForCTC.from_pretrained(model_id)
model.eval()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/4.64k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'Wav2Vec2PhonemeCTCTokenizer'. 
The class this function is called from is 'Wav2Vec2CTCTokenizer'.


preprocessor_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.86k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

Wav2Vec2ForCTC(
  (wav2vec2): Wav2Vec2Model(
    (feature_extractor): Wav2Vec2FeatureEncoder(
      (conv_layers): ModuleList(
        (0): Wav2Vec2LayerNormConvLayer(
          (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation): GELUActivation()
        )
        (1-4): 4 x Wav2Vec2LayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation): GELUActivation()
        )
        (5-6): 2 x Wav2Vec2LayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
          (activation): GELUActivation()
        )
      )
    )
    (feature_projection): Wav2Vec2FeatureProjection(
      (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (projec

In [6]:
# Step 5: Define Feature Extraction Function
def extract_features_from_audio(file_path, model, processor):
    y, sr = librosa.load(file_path, sr=16000)
    inputs = processor(y, sampling_rate=16000, return_tensors="pt", padding=True)
    with torch.no_grad():
        outputs = model(input_values=inputs.input_values.to(device))
    return outputs.last_hidden_state.squeeze(0).cpu().numpy()  # shape: [T, D]


In [7]:
# Step 6: Extract and Save Features for HRL & LRL
def extract_and_save_features_from_manifest(manifest_path, output_dir, prefix):
    os.makedirs(output_dir, exist_ok=True)
    with open(manifest_path, 'r') as f:
        lines = f.readlines()

    for i, line in enumerate(tqdm(lines)):
        path, dur = line.strip().split('\t')
        try:
            features = extract_features_from_audio(path, model, processor)
            fname = f"{prefix}_{i:03d}.npy"
            np.save(os.path.join(output_dir, fname), features)
        except Exception as e:
            print(f"Failed to process {path}: {e}")


In [8]:
#  Feature Extraction
hrl_manifest = os.path.join(DATA_DIR, 'hrl_manifest.tsv')
lrl_manifest = os.path.join(DATA_DIR, 'lrl_manifest.tsv')

hrl_feat_dir = os.path.join(DATA_DIR, 'features_hrl')
lrl_feat_dir = os.path.join(DATA_DIR, 'features_lrl')

extract_and_save_features_from_manifest(hrl_manifest, hrl_feat_dir, "hrl")
extract_and_save_features_from_manifest(lrl_manifest, lrl_feat_dir, "lrl")


  0%|          | 0/50 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/1.26G [00:00<?, ?B/s]

  2%|▏         | 1/50 [00:28<23:27, 28.72s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_000.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


  4%|▍         | 2/50 [00:32<11:10, 13.98s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_001.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


  6%|▌         | 3/50 [00:38<08:14, 10.53s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_002.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


  8%|▊         | 4/50 [00:42<06:07,  7.99s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_003.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 10%|█         | 5/50 [00:48<05:24,  7.20s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_004.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 12%|█▏        | 6/50 [00:54<04:51,  6.62s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_005.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 14%|█▍        | 7/50 [01:01<04:52,  6.81s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_006.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 16%|█▌        | 8/50 [01:06<04:21,  6.24s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_007.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 18%|█▊        | 9/50 [01:13<04:20,  6.36s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_008.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 20%|██        | 10/50 [01:18<04:06,  6.17s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_009.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 22%|██▏       | 11/50 [01:22<03:28,  5.34s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_010.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 24%|██▍       | 12/50 [01:27<03:18,  5.23s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_011.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 26%|██▌       | 13/50 [01:31<03:04,  4.99s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_012.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 28%|██▊       | 14/50 [01:37<03:14,  5.40s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_013.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 30%|███       | 15/50 [01:43<03:10,  5.44s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_014.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 32%|███▏      | 16/50 [01:47<02:54,  5.12s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_015.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 34%|███▍      | 17/50 [01:53<02:55,  5.32s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_016.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 36%|███▌      | 18/50 [01:57<02:40,  5.01s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_017.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 38%|███▊      | 19/50 [02:01<02:19,  4.51s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_018.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 40%|████      | 20/50 [02:08<02:36,  5.22s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_019.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 42%|████▏     | 21/50 [02:11<02:11,  4.55s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_020.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 44%|████▍     | 22/50 [02:14<01:58,  4.22s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_021.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 46%|████▌     | 23/50 [02:22<02:23,  5.30s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_022.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 48%|████▊     | 24/50 [02:29<02:34,  5.93s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_023.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 50%|█████     | 25/50 [02:39<02:57,  7.11s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_024.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 52%|█████▏    | 26/50 [02:43<02:24,  6.04s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_025.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 54%|█████▍    | 27/50 [02:48<02:14,  5.85s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_026.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 56%|█████▌    | 28/50 [02:52<01:54,  5.19s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_027.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 58%|█████▊    | 29/50 [02:57<01:46,  5.07s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_028.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 60%|██████    | 30/50 [03:00<01:31,  4.57s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_029.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 62%|██████▏   | 31/50 [03:06<01:37,  5.11s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_030.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 64%|██████▍   | 32/50 [03:11<01:26,  4.82s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_031.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 66%|██████▌   | 33/50 [03:15<01:18,  4.62s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_032.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 68%|██████▊   | 34/50 [03:18<01:07,  4.20s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_033.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 70%|███████   | 35/50 [03:24<01:09,  4.66s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_034.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 72%|███████▏  | 36/50 [03:32<01:21,  5.82s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_035.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 74%|███████▍  | 37/50 [03:38<01:17,  5.94s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_036.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 76%|███████▌  | 38/50 [03:43<01:06,  5.56s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_037.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 78%|███████▊  | 39/50 [03:48<00:57,  5.22s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_038.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 80%|████████  | 40/50 [03:54<00:56,  5.67s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_039.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 82%|████████▏ | 41/50 [04:01<00:55,  6.15s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_040.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 84%|████████▍ | 42/50 [04:06<00:45,  5.71s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_041.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 86%|████████▌ | 43/50 [04:11<00:39,  5.59s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_042.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 88%|████████▊ | 44/50 [04:16<00:30,  5.13s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_043.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 90%|█████████ | 45/50 [04:20<00:24,  4.95s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_044.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 92%|█████████▏| 46/50 [04:24<00:19,  4.80s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_045.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 94%|█████████▍| 47/50 [04:27<00:12,  4.22s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_046.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 96%|█████████▌| 48/50 [04:33<00:09,  4.62s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_047.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 98%|█████████▊| 49/50 [04:39<00:05,  5.16s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_048.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


100%|██████████| 50/50 [04:46<00:00,  5.73s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_hrl/hrl_049.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'



  2%|▏         | 1/50 [00:07<06:02,  7.39s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_000.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


  4%|▍         | 2/50 [00:14<05:37,  7.03s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_001.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


  6%|▌         | 3/50 [00:21<05:42,  7.28s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_002.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


  8%|▊         | 4/50 [00:27<05:08,  6.71s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_003.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 10%|█         | 5/50 [00:32<04:40,  6.23s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_004.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 12%|█▏        | 6/50 [00:38<04:24,  6.01s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_005.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 14%|█▍        | 7/50 [00:42<03:52,  5.41s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_006.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 16%|█▌        | 8/50 [00:48<03:56,  5.64s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_007.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 18%|█▊        | 9/50 [00:54<03:56,  5.77s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_008.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 20%|██        | 10/50 [01:00<03:52,  5.80s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_009.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 22%|██▏       | 11/50 [01:03<03:10,  4.88s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_010.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 24%|██▍       | 12/50 [01:11<03:38,  5.76s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_011.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 26%|██▌       | 13/50 [01:15<03:20,  5.41s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_012.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 28%|██▊       | 14/50 [01:20<03:03,  5.11s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_013.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 30%|███       | 15/50 [01:24<02:47,  4.79s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_014.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 32%|███▏      | 16/50 [01:29<02:42,  4.77s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_015.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 34%|███▍      | 17/50 [01:34<02:42,  4.94s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_016.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 36%|███▌      | 18/50 [01:37<02:18,  4.33s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_017.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 38%|███▊      | 19/50 [01:46<02:56,  5.69s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_018.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 40%|████      | 20/50 [01:51<02:48,  5.60s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_019.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 42%|████▏     | 21/50 [01:58<02:51,  5.90s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_020.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 44%|████▍     | 22/50 [02:03<02:42,  5.82s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_021.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 46%|████▌     | 23/50 [02:12<03:03,  6.80s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_022.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 48%|████▊     | 24/50 [02:17<02:36,  6.02s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_023.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 50%|█████     | 25/50 [02:21<02:19,  5.58s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_024.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 52%|█████▏    | 26/50 [02:27<02:16,  5.68s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_025.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 54%|█████▍    | 27/50 [02:31<02:00,  5.25s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_026.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 56%|█████▌    | 28/50 [02:38<02:01,  5.53s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_027.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 58%|█████▊    | 29/50 [02:42<01:46,  5.08s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_028.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 60%|██████    | 30/50 [02:45<01:33,  4.66s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_029.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 62%|██████▏   | 31/50 [02:53<01:43,  5.46s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_030.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 64%|██████▍   | 32/50 [02:57<01:34,  5.26s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_031.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 66%|██████▌   | 33/50 [03:06<01:48,  6.40s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_032.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 68%|██████▊   | 34/50 [03:11<01:34,  5.89s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_033.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 70%|███████   | 35/50 [03:16<01:24,  5.63s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_034.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 72%|███████▏  | 36/50 [03:20<01:09,  4.96s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_035.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 74%|███████▍  | 37/50 [03:25<01:07,  5.19s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_036.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 76%|███████▌  | 38/50 [03:32<01:06,  5.55s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_037.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 78%|███████▊  | 39/50 [03:36<00:55,  5.04s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_038.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 80%|████████  | 40/50 [03:39<00:44,  4.48s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_039.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 82%|████████▏ | 41/50 [03:42<00:36,  4.07s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_040.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 84%|████████▍ | 42/50 [03:47<00:35,  4.49s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_041.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 86%|████████▌ | 43/50 [03:50<00:27,  3.99s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_042.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 88%|████████▊ | 44/50 [03:57<00:28,  4.79s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_043.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 90%|█████████ | 45/50 [04:00<00:22,  4.47s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_044.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 92%|█████████▏| 46/50 [04:08<00:21,  5.27s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_045.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 94%|█████████▍| 47/50 [04:13<00:15,  5.27s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_046.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 96%|█████████▌| 48/50 [04:16<00:09,  4.51s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_047.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


 98%|█████████▊| 49/50 [04:19<00:04,  4.08s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_048.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'


100%|██████████| 50/50 [04:26<00:00,  5.32s/it]

Failed to process /content/drive/MyDrive/speech_understanding_project/data/commonvoice_lrl/lrl_049.wav: 'CausalLMOutput' object has no attribute 'last_hidden_state'





In [9]:

print("Feature extraction complete!")



Feature extraction complete!
