In [3]:
%pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Note: you may need to restart the kernel to use updated packages.


In [80]:
from pydub import AudioSegment
from pydub.utils import make_chunks
from IPython.display import Audio, display
import base64
import json
import requests
import os
import shutil


In [90]:
def checkIfDirExists(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)

def combineWavFiles(output_folder, output_filename, input_dir):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    wav_files = [f for f in os.listdir(input_dir) if f.endswith('.wav')]

    
    wav_files.sort()

    combined_audio = AudioSegment.empty()

    for wav_file in wav_files:
        file_path = os.path.join(input_dir, wav_file)
        print(f"Adding {wav_file}...")
        audio = AudioSegment.from_wav(file_path)
        combined_audio += audio  

    if not output_filename.endswith('.wav'):
        output_filename += '.wav'

    output_file = os.path.join(output_folder, output_filename)

    combined_audio.export(output_file, format="wav")
    print(f"Combined audio saved to: {output_file}")
    return output_file


In [92]:

def processAudioFile(input_file, output_dir, api_key, api_url):
    temp_dir = "temp_chunks"
    vocals_dir = "vocals"
    crowd_dir = "crowd"
    
    checkIfDirExists(temp_dir)
    checkIfDirExists(output_dir)
    checkIfDirExists(vocals_dir)
    checkIfDirExists(crowd_dir)

    print(f"Converting {input_file} to WAV...")
    audio = AudioSegment.from_file(input_file)
    wav_file = os.path.splitext(input_file)[0] + ".wav"
    audio.export(wav_file, format="wav")


    print("Splitting WAV file into 30-second chunks...")
    chunk_length_ms = 30 * 1000  # 30 seconds
    chunks = make_chunks(AudioSegment.from_file(wav_file), chunk_length_ms)

    chunk_files = []
    for i, chunk in enumerate(chunks):
        chunk_name = os.path.join(temp_dir, f"chunk_{i}.wav")
        chunk.export(chunk_name, format="wav")
        chunk_files.append(chunk_name)

 
    separated_files = []
    for chunk_file in chunk_files:
        print(f"Processing chunk: {chunk_file}")

    
        with open(chunk_file, 'rb') as f:
            audio_data = base64.b64encode(f.read()).decode('utf-8')

       
        payload = {
            'audio': [
                {
                    'name': os.path.basename(chunk_file),
                    'data': f'data:audio/wav;base64,{audio_data}'
                }
            ],
            'algorithm': {
                'name': 'default',
                'option': [
                    {'name': 'targets', 'data': ['vocals', 'drums', 'bass', 'other']},
                    {'name': 'start_point', 'data': 0}
                ]
            }
        }

 
        headers = {'X-TH_API-Key': api_key, 'Content-Type': 'application/json'}
        response = requests.post(api_url, headers=headers, data=json.dumps(payload))
        if response.status_code == 200:
            result = response.json()
            for output in result.get('output', []):
                audio_base64 = output['data'].split(',')[1]
                audio_content = base64.b64decode(audio_base64)

                if "vocals" in output['name']:
                    print("Saving to vocals folder...")
                    vocals_file = os.path.join(vocals_dir, output['name'])
                    with open(vocals_file, 'wb') as out_f:
                        out_f.write(audio_content)
                    separated_files.append(vocals_file)

                if "other" in output['name']:
                    print("Saving to crowd folder...")
                    crowd_file = os.path.join(crowd_dir, output['name'])
                    with open(crowd_file, 'wb') as out_f:
                        out_f.write(audio_content)
                    separated_files.append(crowd_file)
        else:
            print(f"Failed to process chunk {chunk_file}: {response.text}")

    
    file_stem = os.path.basename(os.path.splitext(input_file)[0])
    combined_vocals = combineWavFiles(output_dir, f"{file_stem}_commentator.wav", vocals_dir)
    combined_crowd = combineWavFiles(output_dir, f"{file_stem}_crowd.wav", crowd_dir)

    shutil.rmtree(temp_dir)
    shutil.rmtree(vocals_dir)
    shutil.rmtree(crowd_dir)

    print("Processing completed.")
    return combined_vocals, combined_crowd

In [None]:
def playAudioFiles(combined_vocals_list, combined_crowd_list):
    for vocal, crowd in zip(combined_vocals_list, combined_crowd_list):
        if os.path.exists(vocal) and os.path.exists(crowd):
            print(f"Playing {vocal}")
            display(Audio(vocal))
            print(f"Playing {crowd}")
            display(Audio(crowd))
            print("-------------------")
        else:
            print(f"File not found: {vocal} or {crowd}")


In [86]:
input_dir = "match_recordings" # This is where the input audio is
output_dir = "separated_audio"
API_KEY = 'e45e_0691_b2688b4710eefa14afed1c6b4a7dff8c3b5e1dea25dec254708b2750275b335b'
API_URL = 'https://api.techhub.developer.sony.com/mss/api/inference'

In [88]:
combined_vocals_list = []
combined_crowd_list = []
for file_name in os.listdir(input_dir):
        if file_name.endswith('.mp3'):
            input_file = os.path.join(input_dir, file_name)
            print(f"Processing file: {input_file}")
            combined_vocals,combined_crowd = processAudioFile(input_file, output_dir, API_KEY, API_URL)
            combined_vocals_list.append(combined_vocals)
            combined_crowd_list.append(combined_crowd_list)

Processing file: match_recordings\brentford_newcastle.mp3
Converting match_recordings\brentford_newcastle.mp3 to WAV...
Splitting WAV file into 30-second chunks...
Processing chunk: temp_chunks\chunk_0.wav
Saving to vocals folder...
Saving to crowd folder...
Processing chunk: temp_chunks\chunk_1.wav
Saving to vocals folder...
Saving to crowd folder...
Adding chunk_0_vocals.wav...
Adding chunk_1_vocals.wav...
Combined audio saved to: separated_audio\brentford_newcastle_commentator.wav
Adding chunk_0_other.wav...
Adding chunk_1_other.wav...
Combined audio saved to: separated_audio\brentford_newcastle_crowd.wav
Processing completed.


In [102]:
print(combined_vocals_list)
for vocal, crowd in zip(combined_vocals_list, combined_crowd_list):
    if os.path.exists(vocal) and os.path.exists(crowd):
        print(f"Playing {vocal}")
        display(Audio(vocal))
        print(f"Playing {crowd}")
        display(Audio(crowd))
        print("-------------------")
    else:
        print(f"File not found: {vocal} or {crowd}")

        

['separated_audio\\brentford_newcastle_commentator.wav']


TypeError: _path_exists: path should be string, bytes, os.PathLike or integer, not list