In [1]:
import os
import pandas as pd
from pcp_module import process_audio_and_save_pcp
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
def process_annotation_file(annotation_file_path):
    """Process a single annotation file and return the processed DataFrame."""
    with open(annotation_file_path, 'r') as file:
        lines = file.readlines()[7:]    
        arff_content = [line.strip().strip("'").split(",") for line in lines]
    
    annotations_df = pd.DataFrame(arff_content, columns=['start_time', 'bar', 'beat', 'chord'])
    annotations_df['start_time'] = annotations_df['start_time'].astype(float)
    annotations_df['bar'] = annotations_df['bar'].astype(int)
    annotations_df['beat'] = annotations_df['beat'].astype(int)
    annotations_df['chord'] = annotations_df['chord'].str.strip("'")
    annotations_df['end_time'] = annotations_df['start_time'].shift(-1)
    annotations_df = annotations_df.ffill()
    
    return annotations_df

def process_file_pair(args):
    """Process a pair of audio and annotation files."""
    audio_file_name, annotations_file_name, dataset_location, annotations_dir_loc, output_dir = args
    try:
        # Process annotation file
        annotation_file_path = os.path.join(annotations_dir_loc, annotations_file_name)
        annotations_df = process_annotation_file(annotation_file_path)
        
        # Process audio and save PCP
        process_audio_and_save_pcp(audio_file_name, dataset_location, annotations_df, output_dir)
        
        return f"Successfully processed {audio_file_name}"
    except Exception as e:
        return f"Error processing {audio_file_name}: {str(e)}"

def main():
    # Configuration
    dataset_location = "./tinyaam/audio-mixes-mp3/"
    output_dir = 'pcpvectors'
    annotations_dir_loc = "./tinyaam/annotations/"
    
    # Get file lists
    audio_file_names = [file for root, dirs, files in os.walk(dataset_location) for file in files]
    arff_files = [file for root, dirs, files in os.walk(annotations_dir_loc) 
                  for file in files if file.endswith('beatinfo.arff')]
    
    # Prepare arguments for parallel processing
    process_args = [
        (audio_file, arff_file, dataset_location, annotations_dir_loc, output_dir)
        for audio_file, arff_file in zip(audio_file_names, arff_files)
    ]
    
    # Calculate optimal number of workers
    num_workers = min(len(process_args), os.cpu_count() * 2)  # 2 threads per CPU core
    
    # Process files in parallel with progress bar
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        # Submit all tasks
        future_to_file = {
            executor.submit(process_file_pair, args): args[0]
            for args in process_args
        }
        
        # Create progress bar
        with tqdm(total=len(process_args), unit=' Files') as pbar:
            for future in as_completed(future_to_file):
                file_name = future_to_file[future]
                try:
                    result = future.result()
                    if "Error" in result:
                        print(f"\nWarning: {result}")
                except Exception as e:
                    print(f"\nError processing {file_name}: {str(e)}")
                finally:
                    pbar.update(1)

if __name__ == "__main__":
    main()

In [4]:
dataset_location = "./tinyaam/audio-mixes-mp3/"
output_dir = 'pcpvectorssingle'
annotations_dir_loc = "./tinyaam/annotations/"

audio_file_names = [file for root, dirs, files in os.walk(dataset_location) for file in files]
arff_files = [file for root, dirs, files in os.walk(annotations_dir_loc) for file in files if file.endswith('beatinfo.arff')]
for audio_file_name, annotations_file_name in tqdm(zip(audio_file_names, arff_files), unit=' Files'):
    with open(os.path.join(annotations_dir_loc,annotations_file_name), 'r') as file:
        lines = file.readlines()[7:]    
        arff_content = [line.strip().strip("'").split(",") for line in lines]
    annotations_df = pd.DataFrame(arff_content, columns=['start_time', 'bar', 'beat', 'chord'])
    annotations_df['start_time'] = annotations_df['start_time'].astype(float)
    annotations_df['bar'] = annotations_df['bar'].astype(int)
    annotations_df['beat'] = annotations_df['beat'].astype(int)
    annotations_df['chord'] = annotations_df['chord'].str.strip("'")
    annotations_df['end_time'] = annotations_df['start_time'].shift(-1)
    annotations_df = annotations_df.ffill()
    process_audio_and_save_pcp(audio_file_name, dataset_location, annotations_df, output_dir)



0 Files [00:00, ? Files/s]

Loading audio file: 0001_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


183 segments [00:22,  8.14 segments/s]
1 Files [00:24, 24.35s/ Files]

Processed: 0001_mix.mp3
Loading audio file: 0080_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


255 segments [00:18, 13.63 segments/s]
2 Files [00:43, 21.09s/ Files]

Processed: 0080_mix.mp3
Loading audio file: 0192_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


215 segments [00:24,  8.82 segments/s]
3 Files [01:07, 22.63s/ Files]

Processed: 0192_mix.mp3
Loading audio file: 0620_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


399 segments [00:40,  9.86 segments/s]
4 Files [01:48, 29.72s/ Files]

Processed: 0620_mix.mp3
Loading audio file: 0758_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


287 segments [00:31,  9.23 segments/s]
5 Files [02:19, 30.26s/ Files]

Processed: 0758_mix.mp3
Loading audio file: 0989_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


319 segments [00:34,  9.21 segments/s]
6 Files [02:54, 31.79s/ Files]

Processed: 0989_mix.mp3
Loading audio file: 1014_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


335 segments [00:36,  9.24 segments/s]
7 Files [03:30, 33.29s/ Files]

Processed: 1014_mix.mp3
Loading audio file: 1050_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


311 segments [00:33,  9.28 segments/s]
8 Files [04:04, 33.40s/ Files]

Processed: 1050_mix.mp3
Loading audio file: 1545_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


255 segments [00:28,  9.05 segments/s]
9 Files [04:32, 31.81s/ Files]

Processed: 1545_mix.mp3
Loading audio file: 1711_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


223 segments [00:25,  8.71 segments/s]
10 Files [04:58, 29.93s/ Files]

Processed: 1711_mix.mp3
Loading audio file: 1941_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


295 segments [00:33,  8.71 segments/s]
11 Files [05:32, 31.17s/ Files]

Processed: 1941_mix.mp3
Loading audio file: 2269_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


223 segments [00:28,  7.73 segments/s]
12 Files [06:01, 30.51s/ Files]

Processed: 2269_mix.mp3
Loading audio file: 2395_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


351 segments [00:35,  9.90 segments/s]
13 Files [06:36, 32.04s/ Files]

Processed: 2395_mix.mp3
Loading audio file: 2462_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


191 segments [00:27,  7.04 segments/s]
14 Files [07:04, 30.59s/ Files]

Processed: 2462_mix.mp3
Loading audio file: 2602_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


343 segments [00:36,  9.50 segments/s]
15 Files [07:40, 32.29s/ Files]

Processed: 2602_mix.mp3
Loading audio file: 2720_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


319 segments [00:33,  9.48 segments/s]
16 Files [08:14, 32.73s/ Files]

Processed: 2720_mix.mp3
Loading audio file: 2828_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


231 segments [00:27,  8.40 segments/s]
17 Files [08:41, 31.19s/ Files]

Processed: 2828_mix.mp3
Loading audio file: 2841_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


463 segments [00:45, 10.21 segments/s]
18 Files [09:27, 35.48s/ Files]

Processed: 2841_mix.mp3
Loading audio file: 2990_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


367 segments [00:40,  9.00 segments/s]
19 Files [10:08, 37.12s/ Files]

Processed: 2990_mix.mp3
Loading audio file: 3000_mix.mp3
Audio loaded with sampling rate: 44100 Hz
Samples per microsecond: 0.0441


191 segments [00:23,  8.01 segments/s]
20 Files [10:31, 31.60s/ Files]

Processed: 3000_mix.mp3



