In [1]:
import librosa
import pandas as pd
import os
import numpy as np
from tqdm import tqdm_notebook as tqdm

In [2]:
y, sr = librosa.load("../Samples/Piano/PN_ab-ba_piano.mp3", mono=False)

In [3]:
y = librosa.core.to_mono(y)
y = librosa.resample(y, sr, 8000)
y = y[np.argwhere(y >= 0.001)[0][0]:]

In [4]:
note_table = pd.read_csv("../Utilities/Note_table.tsv", header=0, sep="\t")

In [5]:
def long_file_to_data(file, rs, note_table):
    y, sr = librosa.load(file, mono=False)
    y = librosa.core.to_mono(y)
    y = librosa.resample(y, sr, rs)
    y = y[np.argwhere(y >= 0.001)[0][0]:]

    df = pd.DataFrame(columns=["x{}".format(i) for i in range(16000)]+["Freq", "Note"])
    notes = list(note_table.iloc[37:72]["Note"])

    for i in range(0, 16000*35, 16000):
        df = df.append(pd.DataFrame({**{"x{}".format(j): [y[j]] for j in range(16000)}, 
                                    **{"Freq": [librosa.note_to_hz(notes[int(i/16000)])],
                                      "Note": [notes[int(i/16000)]]}}))
    return df

In [6]:
test = long_file_to_data("../Samples/Piano/PN_ab-ba_piano.mp3", 8000, note_table)
test.head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x15992,x15993,x15994,x15995,x15996,x15997,x15998,x15999,Freq,Note
0,0.001465,0.001681,0.005331,-0.002838,-0.015286,-0.009736,0.01549,0.010337,0.001405,0.002371,...,0.003488,0.001495,-0.001752,-0.002831,-0.002337,-0.002936,-0.005293,-0.007221,138.591315,C#3
0,0.001465,0.001681,0.005331,-0.002838,-0.015286,-0.009736,0.01549,0.010337,0.001405,0.002371,...,0.003488,0.001495,-0.001752,-0.002831,-0.002337,-0.002936,-0.005293,-0.007221,146.832384,D3
0,0.001465,0.001681,0.005331,-0.002838,-0.015286,-0.009736,0.01549,0.010337,0.001405,0.002371,...,0.003488,0.001495,-0.001752,-0.002831,-0.002337,-0.002936,-0.005293,-0.007221,155.563492,D#3
0,0.001465,0.001681,0.005331,-0.002838,-0.015286,-0.009736,0.01549,0.010337,0.001405,0.002371,...,0.003488,0.001495,-0.001752,-0.002831,-0.002337,-0.002936,-0.005293,-0.007221,164.813778,E3
0,0.001465,0.001681,0.005331,-0.002838,-0.015286,-0.009736,0.01549,0.010337,0.001405,0.002371,...,0.003488,0.001495,-0.001752,-0.002831,-0.002337,-0.002936,-0.005293,-0.007221,174.614116,F3


In [7]:
def dir_to_data(directory, rs, note_table):
    data = pd.DataFrame(columns=["x{}".format(i) for i in range(16000)]+["Freq", "Note"])
    pbar = tqdm(os.listdir(directory))
    
    for file in pbar:
        pbar.set_description("Processing %s" % file)
        df = long_file_to_data(directory + file, rs, note_table)
        data = data.append(df)
        
    data = data.reset_index().drop("index", axis=1)
    return data

In [8]:
data = dir_to_data("../Samples/Piano/", 8000, note_table)

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))




In [10]:
data.sample(5)

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x15992,x15993,x15994,x15995,x15996,x15997,x15998,x15999,Freq,Note
354,0.002689,-0.000705,0.000797,-9.6e-05,0.000314,0.008247,0.007022,0.004153,0.004863,0.001954,...,0.001586,0.003815,0.005015,0.000589,-0.006444,-0.010128,-0.008046,-0.007614,174.614116,F3
200,0.002245,-0.002663,-0.001381,-0.002056,-0.002601,0.009085,0.008122,0.004067,0.005534,-0.001366,...,0.000415,0.00495,0.00729,0.003684,-0.003771,-0.008404,-0.002448,0.001132,587.329536,D5
620,0.001139,0.000994,0.001305,0.001453,0.002319,0.001656,0.001642,0.002582,0.003448,0.002078,...,0.005696,-0.001596,-0.009344,-0.016679,-0.020768,-0.02331,-0.024848,-0.025167,587.329536,D5
205,0.002245,-0.002663,-0.001381,-0.002056,-0.002601,0.009085,0.008122,0.004067,0.005534,-0.001366,...,0.000415,0.00495,0.00729,0.003684,-0.003771,-0.008404,-0.002448,0.001132,783.990872,G5
402,0.001569,0.006986,0.011378,0.013459,0.014746,0.009817,0.002164,0.001771,0.003422,0.002229,...,-0.015665,-0.016884,-0.012438,-0.006269,-0.000647,0.000566,0.00138,0.003745,369.994423,F#4


In [11]:
data.shape

(700, 16002)

In [12]:
data.drop("Note", axis=1).to_csv("data_freq.tsv", index=False, sep="\t")

In [15]:
data.drop("Freq", axis=1).to_csv("data_note.tsv", index=False, sep="\t")