In [None]:
# Pre-processing scripts (this is only done once per dataset; not included in dataloader). Steps to take:
# 1. Load in the audio (10 files for now) to memory
# 2. Normalise all loaded audio files between (-1,1); required for pre-processing
# 2. Save these files in a new location as numpy files (using numpy.save):
#     - Data > raw > preprocessed > train(90%) / test(10%). You want to separate the files for overfitting purposes
# 3. Load in CSV files (10) and save for each corresponding audio file (only need the f0 column)
# 4. Remove silent frames (per 64 samples/blocks of 64) (Maybe do after data loader is complete)
# 

In [1]:
from __future__ import division
import librosa.display
import librosa
import matplotlib.pyplot as plt
import IPython.display as ipd
import csv
import os
import numpy as np 
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import glob
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import statistics
import random

random.seed(100)

In [3]:
# Load in audio files to memory (10 files for now)
# 

index = 15 # choose the index number to load a file
# Get file paths
audio_path_sort = sorted(glob.glob('data/MDB-stem-synth/audio_stems/*'))
audio_paths = audio_path_sort[:index] #only for specified number of files
# to load all files just use^^ [:]
# Load the audio data
audio_data =[]
for path in audio_paths:
    audio, sr = librosa.load(path)
    audio_data.append(audio)


In [4]:
# Load in CSV files and save for each corresponding audio file
csv_paths_sort = sorted(glob.glob('data/MDB-stem-synth/annotation_stems/*'))
csv_paths = csv_paths_sort[:index]

csv_data = []
for path in csv_paths:
    pitch_data = np.loadtxt(path, delimiter=',',unpack=True)
    csv_data.append(pitch_data[1]) # retrieve only frequency data


In [5]:
# Normalise all audio files between (-1,1)

audio_norm = []
for path in audio_data:
    audio_dn = [(path / np.max(np.abs(path)))]
    audio_norm.append(audio_dn)


In [13]:
# Save most of the audio & csv files as numpy in train folder, then remaining files in test folder
audio_length = np.array(audio_paths) #format this into an array representation

idx = np.arange(audio_length.shape[0]) #create an array of indices corresponding to audio_paths indices
np.random.shuffle(idx)

num_files = int(len(idx)*0.9)
train = idx[:num_files] #Get 90% of files for train
train2 = idx[num_files:] #Get 10% of files (whatever is left in idx) 
num_files2 = len(audio_length) - num_files

for i in range(num_files):
    idx_files = train[i]
    filename_path = os.path.basename(audio_paths[idx_files])
    filename_train = os.path.join('data/Raw-data/Train/Audio', filename_path + '.npy')
    
    filename_path_csv = os.path.basename(csv_paths[idx_files])
    filename_csv_train = os.path.join('data/Raw-data/Train/f0', filename_path_csv)
    # use np.save()
#     print('audio', filename_train)
#     print('csv', filename_csv_train)
    
for i in range(num_files2): # for remaining, 10% of files
    idx_files2 = train2[i]
    filename_path2 = os.path.basename(audio_paths[idx_files2])
    filename_train2 = os.path.join('data/Raw-data/Test/Audio', filename_path2 + '.npy')
    
    filename_path_csv = os.path.basename(csv_paths[idx_files2])
    filename_csv_train = os.path.join('data/Raw-data/Test/f0', filename_path_csv)
#     print('audio10', filename_train2)
#     print('csv10', filename_csv_train)
