In [1]:
import pandas as pd
import numpy as np
import librosa
import h5py
import math
import warnings
import json
from loop_generator import LoopGenerator

In [2]:
one_to_loop_ratio = 1
loop_unit_multi_sample_pct = 0.2

In [3]:
import_path = 'audio_metadata - filtered_copy'
meta_data = pd.read_csv(import_path + '.csv', sep=',', index_col=False)
f = open('patterns.json')
looping_patterns = json.load(f)

In [4]:
df_id = np.array(meta_data['id'].astype(int)).flatten()

wavs = []
srs = []

def load_sample(f, sample_id):
    group = f["audio_data"][str(sample_id)]
    waveform = np.array(group["waveform"])
    sample_rate = np.array(group["sample_rate"])
    return waveform, sample_rate


with h5py.File('filtered_audio_data.h5', "r") as f:
    for i in df_id:
        waveform, sr = load_sample(f, i)  # load sample with id=42
        wavs.append(waveform)
        srs.append(sr)


samples = pd.DataFrame({'id': df_id, 'waveform': wavs, 'sample_rate': srs, })

In [5]:
samples = samples.join(meta_data.set_index('id'), on='id')

In [6]:
samples

Unnamed: 0,id,waveform,sample_rate,file_path,file_name,group,category,sub_category,Reverse,Forward,...,Vibraslap,Timpani,Vocal FX,China,Cowbell,Bell,Orchestra,Metalic Tap,One_Shot_Intent,Loop_id
0,5487,"[5.164626e-05, -0.000103295286, 2.5824858e-05,...",44100,808 and kick/heavy kick.wav,Heavy Kick.wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,1.0,
1,5087,"[-0.0004272461, -0.0016479492, -0.007232666, -...",44100,808 samples/808/bass (12).wav,bass (12).wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,1.0,
2,5084,"[-0.0009765625, -0.0026550293, -0.009033203, -...",44100,808 samples/808/bass (13).wav,bass (13).wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,1.0,
3,5083,"[-0.00039672852, -0.00045776367, -0.0014953613...",44100,808 samples/808/bass (14).wav,bass (14).wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,1.0,
4,5098,"[-0.0029296875, -0.010894775, -0.051208496, -0...",44100,808 samples/808/bass (16).wav,bass (16).wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3592,7050,"[0.0014566779, -0.0014469028, 0.009420276, 0.5...",44100,we unite - ultimate future bounce (sample pack...,15 Kick D.wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,0.0,1523.0
3593,7061,"[0.0014566779, -0.0014469028, 0.009420276, 0.5...",44100,we unite - ultimate future bounce (sample pack...,16 Kick D#.wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,0.0,1524.0
3594,7059,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",44100,we unite - ultimate future bounce (sample pack...,17 Kick E.wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,0.0,1525.0
3595,7053,"[-0.018602252, 0.06855637, 0.044089437, 0.1702...",44100,we unite - ultimate future bounce (sample pack...,18 Kick A.wav,Drum,Kick,,0,1,...,0,0,0,0,0,0,0,0,0.0,1525.0


In [7]:
category_counts = {}
sample_categories = set(samples['category'])
for cat in sample_categories:
    category_counts[cat] = {}
    category_counts[cat]["loop_count"] = len(samples[(samples['category'] == cat) & (samples['One_Shot'] == 0)])
    category_counts[cat]["one_shot_count"] = len(samples[(samples['category'] == cat) & (samples['One_Shot'] == 1)])
    if 'One_Shot_Intent' and 'Loop_id' in samples.columns:
        category_counts[cat]['one_shot_intent_count'] = len(samples[(samples['category'] == cat) & (samples['One_Shot'] == 1) & (samples['One_Shot_Intent'] == 1)])
        category_counts[cat]['loop_intent_count'] = len(samples[(samples['category'] == cat) & (samples['One_Shot_Intent'] == 0)])
    else:
        category_counts[cat]['one_shot_intent_count'] = 0
        category_counts[cat]['loop_intent_count'] = 0
    category_counts[cat]["all"] = category_counts[cat]["loop_count"] + category_counts[cat]["one_shot_count"]
    category_counts[cat]["required_one_shot_count_samples"] = math.floor((one_to_loop_ratio * category_counts[cat]["all"]) / (one_to_loop_ratio +1 + loop_unit_multi_sample_pct))
    category_counts[cat]["required_loop_count_units"] = math.floor((one_to_loop_ratio * category_counts[cat]["all"]) / (one_to_loop_ratio +1 + loop_unit_multi_sample_pct))
    category_counts[cat]["required_loop_count_samples"] = int(category_counts[cat]["required_loop_count_units"] * (1+loop_unit_multi_sample_pct))
    if category_counts[cat]["required_loop_count_samples"] + category_counts[cat]["required_one_shot_count_samples"] > category_counts[cat]["all"]:
        warnings.warn(f"greater required samples than available")


category_counts

{'Hi Hat': {'loop_count': 195,
  'one_shot_count': 1004,
  'one_shot_intent_count': 545,
  'loop_intent_count': 654,
  'all': 1199,
  'required_one_shot_count_samples': 545,
  'required_loop_count_units': 545,
  'required_loop_count_samples': 654},
 'Clap': {'loop_count': 195,
  'one_shot_count': 1004,
  'one_shot_intent_count': 545,
  'loop_intent_count': 654,
  'all': 1199,
  'required_one_shot_count_samples': 545,
  'required_loop_count_units': 545,
  'required_loop_count_samples': 654},
 'Kick': {'loop_count': 195,
  'one_shot_count': 1004,
  'one_shot_intent_count': 545,
  'loop_intent_count': 654,
  'all': 1199,
  'required_one_shot_count_samples': 545,
  'required_loop_count_units': 545,
  'required_loop_count_samples': 654}}

In [8]:
samples_copy = samples.copy()
if 'One_Shot_Intent' and 'Loop_id' not in samples_copy.columns:
    samples_copy['One_Shot_Intent'] = np.nan
    samples_copy['Loop_id'] = np.nan

# next up  
- adding swing, velocity, and pitch variations
- ensuring everything works when importing new data

In [9]:
def clean_merge(df1, df2):
    merged = df1.merge(
            df2[['id', 'One_Shot_Intent', 'Loop_id']],
            on='id',
            how='left',
            suffixes=('', '_new')
        )

    for col in ['One_Shot_Intent', 'Loop_id']:
        merged[col] = merged[f'{col}_new'].combine_first(merged[col])
        merged.drop(columns=[f'{col}_new'], inplace=True)

    return merged

In [10]:
gen = LoopGenerator()

num_of_loop_units = int(one_to_loop_ratio / loop_unit_multi_sample_pct)
num_of_loop_samples = math.ceil((one_to_loop_ratio + loop_unit_multi_sample_pct) / loop_unit_multi_sample_pct)

updated_rows = []
# if one shot count is greater than needed and 
for cat in category_counts:
    # count for intents meet the required counts - no edits need to be made
    if (category_counts[cat]['one_shot_intent_count'] == category_counts[cat]['required_one_shot_count_samples']) and (category_counts[cat]['loop_intent_count'] == category_counts[cat]['required_loop_count_samples']):
        print(f'one shot intent and loop intent counts for {cat} meet requirements')
    # 0 count for both intents - just created intent col
    elif category_counts[cat]['one_shot_count'] > category_counts[cat]['required_one_shot_count_samples'] and category_counts[cat]['one_shot_intent_count'] == 0 and category_counts[cat]['loop_intent_count'] == 0 :
        print(f'one shot intent and loop intent counts for {cat} do not meet requirements')
        tmp = samples_copy[(samples_copy['category'] == cat) & (samples_copy['One_Shot'] == 1)].copy()
        
        ones = tmp.iloc[:category_counts[cat]['required_one_shot_count_samples']].copy()
        ones['One_Shot_Intent'] = 1
        
        existing_loops = samples_copy[(samples_copy['category'] == cat) & (samples_copy['One_Shot'] == 0)].copy()
        loops = tmp.iloc[category_counts[cat]['required_one_shot_count_samples']:(category_counts[cat]['required_one_shot_count_samples']+category_counts[cat]['required_loop_count_samples']-len(existing_loops))].copy()

        existing_loops['One_Shot_Intent'] = 0
        loops['One_Shot_Intent'] = 0
        loops.reset_index(drop=False, inplace=True)
        
        gen.generate_loops(loops, looping_patterns, num_of_loop_samples, cat)
        tmp = clean_merge(tmp, gen.old_files)
        tmp = clean_merge(tmp, ones)

        tmp = tmp[~(
            (tmp['One_Shot_Intent'].isna() | (tmp['One_Shot_Intent'] == '')) &
            (tmp['Loop_id'].isna() | (tmp['Loop_id'] == ''))
        )]

        tmp = pd.concat([tmp, existing_loops], ignore_index=True)
        updated_rows.append(tmp)

    # non 0 count exists for intents. intents already existing - dataset has done some loop generation already and new files were probably added
    elif (category_counts[cat]['one_shot_count'] > category_counts[cat]['required_one_shot_count_samples']) and (category_counts[cat]['one_shot_intent_count'] < category_counts[cat]['required_one_shot_count_samples']) and (category_counts[cat]['loop_intent_count'] < category_counts[cat]['required_loop_count_samples']):
        tmp = samples_copy[samples_copy['category'] == cat].copy()
        used_ones = tmp[(tmp['One_Shot'] == 1) & (tmp['One_Shot_Intent'] == 1)]
        unused_ones = tmp[(tmp['One_Shot'] == 1) & (tmp['One_Shot_Intent'].isnull())]
        print(f'used_ones len: {len(used_ones)} -- unused_ones len: {len(unused_ones)}')
    elif category_counts[cat]['needed_each'] > category_counts[cat]['one_shot_count']:
        warnings.warn(f"not enough one shots to reach {one_to_loop_ratio}:1 ratio")


if updated_rows:
    all_updates = pd.concat(updated_rows)
    samples_copy = clean_merge(samples_copy, all_updates)

    samples_copy = samples_copy[~(
            (samples_copy['One_Shot_Intent'].isna() | (samples_copy['One_Shot_Intent'] == '')) &
            (samples_copy['Loop_id'].isna() | (samples_copy['Loop_id'] == ''))
        )]

one shot intent and loop intent counts for Hi Hat meet requirements
one shot intent and loop intent counts for Clap meet requirements
one shot intent and loop intent counts for Kick meet requirements


In [11]:
gen.generated_files_metadata

In [12]:
gen.save_updates()

In [13]:
samples_copy.drop(columns=['waveform', 'sample_rate'], inplace=True)
samples_copy.to_csv(import_path + '.csv', sep=',', index=False)

In [14]:
samples_copy

Unnamed: 0,id,file_path,file_name,group,category,sub_category,Reverse,Forward,One_Shot,Loop,...,Vibraslap,Timpani,Vocal FX,China,Cowbell,Bell,Orchestra,Metalic Tap,One_Shot_Intent,Loop_id
0,5487,808 and kick/heavy kick.wav,Heavy Kick.wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,1.0,
1,5087,808 samples/808/bass (12).wav,bass (12).wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,1.0,
2,5084,808 samples/808/bass (13).wav,bass (13).wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,1.0,
3,5083,808 samples/808/bass (14).wav,bass (14).wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,1.0,
4,5098,808 samples/808/bass (16).wav,bass (16).wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3592,7050,we unite - ultimate future bounce (sample pack...,15 Kick D.wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,0.0,1523.0
3593,7061,we unite - ultimate future bounce (sample pack...,16 Kick D#.wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,0.0,1524.0
3594,7059,we unite - ultimate future bounce (sample pack...,17 Kick E.wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,0.0,1525.0
3595,7053,we unite - ultimate future bounce (sample pack...,18 Kick A.wav,Drum,Kick,,0,1,1,0,...,0,0,0,0,0,0,0,0,0.0,1525.0
