# Version Checks (System Dependent)

Installation Scripts Prior to Running:

```bash
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
sudo apt-get update && sudo apt-get install -y libsndfile1 ffmpeg
pip install Cython packaging
pip install nemo_toolkit['asr']
pip install datasets
```

## Numba can detect CUDA

In [1]:
from numba import cuda
print(cuda.is_available())
# Requires numpy 1.24 or greater

True


## Numpy version oops

In [2]:
import numpy
print(numpy.__version__)

1.26.4


## Torch can detect CUDA

In [3]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.7.0+cu126
True


## Proper Nemo installations

In [4]:
import nemo
print(nemo.__version__)

2.3.1


In [5]:
import nemo.collections.asr as nemo_asr

In [6]:
!export CUDA_VISIBLE_DEVICES=1

# Starts

In [7]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("ThePyProgrammer/asr")

In [8]:
import torch
import os
import json
import librosa
import glob
import subprocess
import nemo.collections.asr as nemo_asr
from omegaconf import OmegaConf, DictConfig
from nemo.core.config import hydra_runner
from nemo.utils import logging
from nemo.utils.exp_manager import exp_manager

import lightning.pytorch as pl
# Correct import
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from lightning.pytorch.loggers import TensorBoardLogger

print("PyTorch CUDA available:", torch.cuda.is_available(), "CUDA version:", torch.version.cuda)

# Download and prepare AN4 data (same as before)
DATA_DIR = os.getcwd() + "/files/"
os.environ["DATA_DIR"] = DATA_DIR


PyTorch CUDA available: True CUDA version: 12.6


In [9]:
# if not os.path.exists(f"{DATA_DIR}/an4_sphere.tar.gz"):
#     !wget https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz

# # Extract and convert data
# if not os.path.exists(f"{DATA_DIR}/an4"):
#     !tar -xvf an4_sphere.tar.gz
    
#     !mv an4 $DATA_DIR

In [10]:
from huggingface_hub import snapshot_download

directory = snapshot_download(repo_id="ThePyProgrammer/asr", repo_type="dataset")

Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

In [11]:
!rm -r files/
!mkdir files/

In [12]:
import pyarrow.parquet as pq
j = 0
transcript = []
for i in range(8):
    df = pq.read_table(f'{directory}/data/train-0000{i}-of-00008.parquet').to_pandas()
    transcript += df['transcript'].to_list()
    for _, k in df['audio'].items():
        res = k['bytes']
        with open(f'files/{j}.wav', mode='bx') as f:
            f.write(res)
        j += 1

In [13]:
from tqdm import tqdm
def an4_build_manifest(trans_array, manifest_path, target_wavs_dir):
    """Build an AN4 manifest from a given transcript file."""
    maxi, mini = -1, 1000
    with open(manifest_path, 'w') as fout:
        for idx, line in enumerate(tqdm(trans_array)):
            trans = line
            audio_path = os.path.join(target_wavs_dir, str(idx) + '.wav')
            duration = librosa.core.get_duration(filename=audio_path)
            if maxi < duration: 
                maxi = duration
            if mini > duration:
                mini = duration
            metadata = {"audio_filepath": audio_path, "duration": duration, "text": trans}
            json.dump(metadata, fout)
            fout.write('\n')
    return maxi, mini

# Convert data and build manifests
source_data_dir = f"{DATA_DIR}"
target_data_dir = f"{DATA_DIR}"
    
# Build AN4 manifests
train_manifest = os.path.join(DATA_DIR, 'train_manifest.json')
maxi, mini = an4_build_manifest(transcript, train_manifest, DATA_DIR)

test_manifest = os.path.join(DATA_DIR, 'test_manifest.json')
an4_build_manifest(transcript[:8], test_manifest, DATA_DIR)

maxi, mini

100%|████████████████████████████████████████████████████████████████████████████████████████████| 4500/4500 [06:01<00:00, 12.43it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 17.87it/s]


(73.038375, 1.33225)

In [22]:
import argparse
import os
from huggingface_hub import snapshot_download


def download_model(model_name, path, revision="main", local_dir_name=None):
    """
    Download a model from HuggingFace Hub.

    Args:
        model_name (str): Name of the model to download (e.g., facebook/wav2vec2-base-960h)
        path (str): Path where the model will be downloaded
        revision (str): Specific model revision to download
        local_dir_name (str, optional): Custom directory name for the downloaded model

    Returns:
        Path to the downloaded model
    """
    # Create the directory if it doesn't exist
    os.makedirs(path, exist_ok=True)

    # Set the local directory name
    if local_dir_name is None:
        # Use the last part of the model name as directory name
        # e.g., facebook/wav2vec2-base-960h -> wav2vec2-base-960h
        local_dir_name = model_name.split('/')[-1]

    local_dir = os.path.join(path, local_dir_name)

    print(f"Downloading model '{model_name}' (revision: {revision})...")
    print(f"Target directory: {local_dir}")

    try:
        # Download the model
        model_path = snapshot_download(
            repo_id=model_name,
            revision=revision,
            local_dir=local_dir,
            ignore_patterns=["*.msgpack", "*.safetensors", "*.h5", "*.ot", "*.tflite"]
        )
        print(f"Successfully downloaded model to {model_path}")
        return model_path
    except Exception as e:
        print(f"Error downloading model: {e}")
        return None



In [23]:
download_model('nvidia/parakeet-tdt-0.6b-v2', './')

Downloading model 'nvidia/parakeet-tdt-0.6b-v2' (revision: main)...
Target directory: ./parakeet-tdt-0.6b-v2


Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

Successfully downloaded model to /home/raid/cognition/til/asr/notebooks/parakeet-tdt-0.6b-v2


'/home/raid/cognition/til/asr/notebooks/parakeet-tdt-0.6b-v2'