# Init Imports

In [None]:
from fastai import *
import librosa
import librosa.display
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
from numpy import random
import os
import pydub
from pydub import AudioSegment
import wave
import shutil
from scipy.io import wavfile
import pandas as pd
from PIL import Image
from IPython.display import Image, display
import gc
from contextlib import contextmanager
import memory_profiler

## This will erase working directory
Good for debugging

In [None]:
import os
import shutil

# List all files and directories in /kaggle/working
for filename in os.listdir('/kaggle/working'):
    # Check if the filename is a directory
    if os.path.isdir(os.path.join('/kaggle/working', filename)):
        # If it is, delete the directory and its contents
        shutil.rmtree(os.path.join('/kaggle/working', filename))
    else:
        # If it's not a directory, delete the file
        os.remove(os.path.join('/kaggle/working', filename))


# Create paths to dataset
returns number of speaker sample directories

In [None]:
corpus_path = "../input/english-multispeaker-corpus-for-voice-cloning/VCTK-Corpus"
raw_audio_dirs = "/kaggle/input/english-multispeaker-corpus-for-voice-cloning/VCTK-Corpus/VCTK-Corpus/wav48/"
toal_num_speakers = len(os.listdir(raw_audio_dirs))
print(f"There are {toal_num_speakers} total speaker directories")

## Save number of sample files from each speaker directory

In [None]:
sample_files = 327

sample_dirs = 109

In [None]:
working_dir = '/kaggle/working'

raw_dir = f'{working_dir}/raws'
os.makedirs(raw_dir, exist_ok = True)

sample_directories = []

i = 0
j = 1

print(f"Taking {sample_files} sample files from {sample_dirs} sample directories")

#                         Iterate though all audio directories
for dirname, _, filenames in os.walk(raw_audio_dirs):
    if i == 0:
        i += 1
    else:
        i = 0

        new_dirname = f"{raw_dir}/{os.path.basename(dirname)}_samples"
        os.makedirs(new_dirname, exist_ok = True)
        
    for filename in filenames:
        
        # Save samples to new dirs
        src_path = os.path.join(dirname, filename)
        new_path = os.path.join(new_dirname, filename)
        shutil.copy(src_path, new_path)
        
        i += 1
        
        if i == sample_files:
            
            print(f"{sample_files} samples saved at {new_dirname}\n")
            sample_directories.append(new_dirname)
            break
    
    if j > sample_dirs:
        break
    j += 1

#### We now have sample_directories, a list of speaker directories

In [None]:
speaker_max = len(sample_directories)
speaker_max

## Create indexing dataframes
These will store the locations of all overlays and their parent/child pair

In [None]:
speaker_matrix = []
for i in range(speaker_max):
    speaker_name = f'speaker_{i}'
    speaker_matrix.append(speaker_name)
data = []
df = pd.DataFrame(data, columns = speaker_matrix, index = speaker_matrix)
parent_df = pd.DataFrame(data, columns = speaker_matrix, index = speaker_matrix)
child_df = pd.DataFrame(data, columns = speaker_matrix, index = speaker_matrix)

## Create project directory and subdirectories

In [None]:
project_dir = f'{working_dir}/overlay_data'
os.makedirs(project_dir, exist_ok = True)

# Wav directories 
wav_dir = f'{project_dir}/wav'
os.makedirs(wav_dir, exist_ok = True)

wav_parent = f'{wav_dir}/parents'
os.makedirs(wav_parent, exist_ok = True)

wav_child = f'{wav_dir}/childs'
os.makedirs(wav_child, exist_ok = True)

wav_overlay = f'{wav_dir}/overlays'
os.makedirs(wav_overlay, exist_ok = True)

# Spectrograph directories (png)
# spectro_dir = f"{project_dir}/png"
# os.makedirs(spectro_dir, exist_ok = True)

# spectro_parent = f"{spectro_dir}/parents"
# os.makedirs(spectro_parent, exist_ok = True)

# spectro_child = f"{spectro_dir}/childs"
# os.makedirs(spectro_child, exist_ok = True)

# spectro_overlay =  f"{spectro_dir}/overlays"
# os.makedirs(spectro_overlay, exist_ok = True)

# The BIG function
Creates overlays and spectrographs, saves the combos to a dataframe

In [None]:
# Just run this cell if you want to reset these 
dir_list = [fol for fol in sample_directories]
speaker_folders = [os.listdir(folder) for folder in dir_list]

## Overlay function
Probably going to want to comment out the print functions for the BIG DATA

In [None]:
def overlay(parent_path, child_path):
    parent_audio = AudioSegment.from_file(parent_path, format='wav')
    p_length = len(parent_audio)
    child_audio = AudioSegment.from_file(child_path, format='wav')
    c_length = len(child_audio)
    max_length = min(p_length, c_length)
    overlay_audio = parent_audio.overlay(child_audio[:max_length])
    del parent_audio, child_audio  # Remove unnecessary variables from memory
    return overlay_audio

A little demonstration of how this works

## Spectrograph function

In [None]:
%load_ext memory_profiler
def spectrograph(file_path, out_path=None):
    
    with open(file_path, 'rb') as f:
        x, sr = librosa.load(f, duration=10)  # Load only a portion of the file

    # Get the short time Fourier transform of the sound file
   
    X = librosa.stft(x)

    # Convert amplitude to dB
   
    Xdb = librosa.amplitude_to_db(abs(X))

    # Plot spectrogram

    plt.figure(figsize=(14, 5))
    librosa.display.specshow(Xdb, sr=sr)
    
    if out_path:
        plt.savefig(out_path, dpi=100, transparent=True, bbox_inches='tight', pad_inches=0)
        plt.close('all')
 
    # Delete unnecessary variables
    del x, X, Xdb, sr


## Choose files function
returns file_path, speaker_name, and notype filename (for saving into the dataframes)

In [None]:
@contextmanager
def overlay_file_context_manager(speaker_folder, dir_list, wav_path, spectro_path, speaker_name):
    try:
        # Randomly get sample
        rand_index = np.random.randint(0, (len(speaker_folder) - 1))  
        chosen = speaker_folder.pop(rand_index)
        file_path = f'{dir_list}/{chosen}'

        # Create symlink to chosen file
        wav_file_path = f'{wav_path}/{chosen}'
        os.symlink(file_path, wav_file_path)

        # save_notype
        chosen_notype = os.path.splitext(str(chosen))[0]
#         spec_out = f"{spectro_path}/{chosen_notype}.png"
#         spectrograph(file_path, spec_out)

        yield file_path, speaker_name, chosen_notype

    except Exception as e:
        raise e

    finally:
        # Clean up
        os.remove(wav_file_path)  # Remove symlink
        gc.collect()

Iterating through parents first, [ ***i*** ], overlayed with 1 random sample from each of all the other speaker [ ***j*** ] directories

In [None]:
for i, folder in enumerate(sample_directories):
    %memit
    print("------------------------------------------------------------------------------------")
    
    for j, fol in enumerate(sample_directories):
        n = j
        if i == j:
            continue
        # Get parent/child pair

        with overlay_file_context_manager(speaker_folders[i], dir_list[i], wav_parent, spectro_parent, f'speaker_{i}') as (parent_file_path, parent_speaker_name, parent_notype):
            with overlay_file_context_manager(speaker_folders[j], dir_list[j], wav_child, spectro_child, f'speaker_{j}') as (child_file_path, child_speaker_name, child_notype):

                # Create Overlay
                overlay_file = overlay(parent_file_path, child_file_path)
                filename = f"{parent_speaker_name}_and_{child_speaker_name}.wav"
                overlay_file.export(f'{wav_overlay}/{filename}', format = 'wav')
 
                filename_notype = os.path.splitext(str(filename))[0]
#                 spec_out = f"{spectro_overlay}/{filename_notype}.png"
                overlay_path = f'{wav_overlay}/{filename}'

#                 spectrograph(overlay_path, spec_out)

                # Append to DF
                df.loc[child_speaker_name, parent_speaker_name] = filename_notype
                parent_df.loc[child_speaker_name, parent_speaker_name] = parent_notype
                child_df.loc[child_speaker_name, parent_speaker_name] = child_notype
         
                if i == speaker_max - 1:
                    n += 1
                if n == speaker_max - 1:
                    print(f"Saved {n} files from speaker {i} to [{wav_overlay}]\n")
        
                del overlay_file
                del spec_out
                del overlay_path
                gc.collect()
print("\n------------------------------------------------------------------------------------\n                                     [DONE]                  \n------------------------------------------------------------------------------------\n")

In [None]:
# Save dataframes as csv
df.to_csv('overlays.csv', index=False)
child_df.to_csv('children.csv', index=False)
parent_df.to_csv('parents.csv', index=False)

# Dataframes
The former function propagated these dataframes:

                
                df, child_df, parent_df
                
Containing the names of all overlays, and their parent/child pairs. We can use this to create a lookup function:

In [None]:
def lookup(parent_dir, parent, child):
    # Get Names
    parent_name = parent_df.loc[child, parent]
    child_name = child_df.loc[child, parent]
    overlay_name = df.loc[child, parent]
    
    # get extension
    last_slash_idx = parent_dir.rfind('/')
    ext = parent_dir[last_slash_idx:].replace('/','.')

    # Returns
    p_path = os.path.join(f"{parent_dir}/parents", f"{parent_name}{ext}")
    
    c_path = os.path.join(f"{parent_dir}/childs", f"{child_name}{ext}")
    o_path = os.path.join(f"{parent_dir}/overlays", f"{overlay_name}{ext}")

    return p_path, c_path, o_path

In [None]:
# overlay_trio = lookup(spectro_dir, 'speaker_0', 'speaker_1')
# overlay_trio

In [None]:
# for i, png in enumerate(overlay_trio):
#     if i == 0:
#         print("Parent:\n")
#     if i == 1:
#         print("Child:\n")
#     if i == 2:
#         print("Overlay:\n")
#     display(Image(filename=png))