In [1]:
import os
import requests

In [2]:
def download_release_assets(repo_owner, repo_name):
    if not os.path.exists("audio_from_gh"):
        os.makedirs("audio_from_gh")
    if os.path.exists(f"audio_from_gh/{repo_name}.wav"):
        print(f"File {repo_name}.wav already exists. Skipping download.")
        return    
    url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/releases/latest"
    response = requests.get(url)
    
    if response.status_code == 200:
        release_data = response.json()
        assets = release_data.get('assets', [])
        
        if not assets:
            print(f"No assets found for {repo_owner}/{repo_name}")
            return
        
        for asset in assets:
            asset_name = asset.get('name', 'unknown_asset')
            asset_url = asset.get('browser_download_url', None)
            
            if asset_url and asset_name == 'videoplayback_Source_StandardAudio.wav':
                print(f"Downloading asset '{asset_name}' from {repo_owner}/{repo_name}")
                download_asset(asset_url, asset_name, repo_name)
            else:
                print(f"No download URL found for asset '{asset_name}'")
    else:
        print(f"Failed to fetch release data for {repo_owner}/{repo_name}. Status code: {response.status_code}")

def download_asset(asset_url, asset_name, repo_name):
    response = requests.get(asset_url)
    
    if response.status_code == 200:
        with open(f"audio_from_gh/{repo_name}.wav", 'wb') as f:
            f.write(response.content)
        print(f"Downloaded {asset_name}")
    else:
        print(f"Failed to download asset '{asset_name}' from {asset_url}. Status code: {response.status_code}")

repositories = [
    {"owner": "MonlamAI", "name": "STT_MV0271"},
    {"owner": "MonlamAI", "name": "STT_MV0471"},
    {"owner": "MonlamAI", "name": "STT_MV0277"},
    {"owner": "MonlamAI", "name": "STT_MV0475"},
    {"owner": "MonlamAI", "name": "STT_MV0272"},
    {"owner": "MonlamAI", "name": "STT_MV0464"},
    {"owner": "MonlamAI", "name": "STT_MV0463"},
    {"owner": "MonlamAI", "name": "STT_MV0466"},
    {"owner": "MonlamAI", "name": "STT_MV0276"},
    {"owner": "MonlamAI", "name": "STT_MV0462"},
    {"owner": "MonlamAI", "name": "STT_MV0465"},
    {"owner": "MonlamAI", "name": "STT_MV0469"},
    {"owner": "MonlamAI", "name": "STT_MV0274"},
    {"owner": "MonlamAI", "name": "STT_MV0273"},
]

for repo in repositories:
    download_release_assets(repo["owner"], repo["name"])


In [None]:
import subprocess
def convert_to_16K(file):
    if(os.path.exists(f"audio_16k/{file}.wav")):
        return False
    convert_command = f"""ffmpeg -i audio_from_gh/{file}.wav -f wav -bitexact -acodec pcm_s16le -ac 1 -ar 16000 audio_16k/{file}.wav -y"""
    subprocess.run(convert_command, shell=True, capture_output=True)
    return True

In [None]:
from pathlib import Path
path = Path('./audio_from_gh')
full_audios = set([e.stem for e in path.iterdir() if e.is_file()])

In [None]:
for full_audio in full_audios:
    print(full_audio)
    convert_to_16K(full_audio)