## Experimenting with the library

In [None]:
import os 

In [None]:
os.chdir("../")

In [None]:
print(os.getcwd())
musicPath = os.getcwd()

In [None]:
os.chdir(musicPath+"/code/midicsv/")

In [None]:
import subprocess as sp

In [None]:
outputFile =open(musicPath+'/output/test.csv','w+')
errorFile =open('test.csv','w+')

In [None]:
print(os.getcwd())
os.listdir()

In [None]:
sp.call(['./midicsv', '-v', musicPath+"/data/maestro/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi"], stdout=outputFile, stderr=errorFile)
outputFile.close()
errorFile.close()

## Feed into Pandas DF

In [1]:
import os 
import subprocess
import sys
import pandas as pd
import math
import concurrent.futures
os.chdir("../")
musicPath = os.getcwd()
dataPath = musicPath + "/data"

os.chdir(musicPath+"/code/midicsv/")

In [2]:
def midiToCsv(restofPath):
    cmd = ['./midicsv', '-v', musicPath+restofPath]
    a = subprocess.Popen(cmd, stdout=subprocess.PIPE)

    if sys.version_info[0] < 3: 
        from StringIO import StringIO
    else:
        from io import StringIO

    b = StringIO(a.communicate()[0].decode('utf-8'))
    df = pd.read_csv(b,names=["track","time", "type", "channel","note","velocity", "other", "encoded"], sep=",")
    return df

In [3]:
def getQuarterNoteRate(df):
#     print(df)
    noteRate = df['velocity'].iloc[0]
    return noteRate

In [4]:
def setQuarterNoteTime(df, noteRate, sampleRate):
    df['time']=df['time'].apply(lambda x:int(math.floor(x/(noteRate/sampleRate))))
    return df

In [5]:
def removeControlC(df):
    indexNames = df[ df['type'].str.contains('Control_c')].index
    # Delete these row indexes from dataFrame
    out = df.drop(indexNames )
    #print(out)
    return out

In [6]:
def getNote(x):
    # make easier with: df.name.str.extract(r'([\d]+)',expand=False)
    arr = x.split('|')
    return arr[1]

def reformatStrings(df):
    df['note'] = df['note'].fillna(0).astype(int)
    df['encoded'] = df['type']+"|"+df['note'].astype(str)+"|"+df['velocity'].astype(str)
    df['finalEncode'] = df['encoded'].apply(lambda x: "off" + getNote(x)  if x.endswith("|0.0") else ("on" + getNote(x)))

    return df


In [7]:
def trackisOnlyPianolike(reformedRows):
    if (len(reformedRows[reformedRows['type'].str.contains('Program_c')]) == 1):
#         print(reformedRows.iloc[0])
        if (0<=int(reformedRows.iloc[0].channel)<8):
            return True
        else:
            print("not Piano Track")
            return False
    else:
        print("changes in program")
        return False


In [8]:
def makeTrueRows(reformedRows):
#     print(reformedRows['type'].nunique())
    trueRows = reformedRows[reformedRows['type'].str.contains('Note_on_c')]
    return trueRows

In [9]:
def makeNoteString(trueRows):
    startTime = trueRows['time'].iloc[0]
    prevTime = int(startTime)
    outArr = []
    for index, row in trueRows.iterrows():
        rowTime = int(row['time'])
#         print(row)
        if prevTime == rowTime:
            outArr.append(row['finalEncode'])
#             outPut += " " + row['finalEncode']
        elif prevTime < rowTime:
            diffTime = rowTime - 1 - prevTime
            outArr.append("wait"+str(diffTime))
#             print(row['finalEncode'])
            outArr.append(row['finalEncode'])
            prevTime = int(rowTime)
#             outPut += " " + "wait"+str(diffTime)
#             outPut += " " + row['finalEncode']

#     print(outArr)
    return  " ".join(outArr)
#     for i in range (indexes[2]+2,indexes[3]):
#         print( diffTime.loc[i])

In [10]:
def removeTitle(df):
    indexNames = df[ df['type'].str.contains('Title_t')].index
    # Delete these row indexes from dataFrame
    out = df.drop(indexNames )
    #print(out)
    return out

In [11]:
def trainingString(diffTime):
    diffTime.index = pd.RangeIndex(len(diffTime.index))
    search_values = ['Start_track','End_track']
    #TODO deal with multiple tracks?
    # diffTime.head(20)
    startStops = diffTime[diffTime.type.str.contains('|'.join(search_values ))]
    indexes = startStops.index
    reformedRows = diffTime.iloc[ indexes[2]+1:indexes[3] , :]
#     print(reformedRows)
    #make new df with only the note on note off ones
    if (trackisOnlyPianolike(reformedRows)):
        trueRows = makeTrueRows(reformedRows)
    #     print(trueRows.count())
        output = makeNoteString(trueRows)
        return output

In [12]:
def processFile(fileName):
    df = midiToCsv(fileName)
#     print(df.head(20))
    noteRate = getQuarterNoteRate(df)
    withoutC = removeControlC(df)
    withoutT = removeTitle(withoutC)
    diffTime = setQuarterNoteTime(withoutT, noteRate, 12)
#     diffTime.head(20)

    reformatStrings(diffTime)
    return trainingString(diffTime)

In [13]:
def splitData(musicPath):
    songList = pd.read_csv (musicPath + '/data/maestro/maestro-v2.0.0.csv')
#     print(songList.shape)
    train = songList[songList["split"] == 'train']
    test = songList[songList["split"] == 'test']
    validation = songList[songList["split"] == 'validation']

    return train, test, validation

In [14]:
train, test, validate = splitData(musicPath)

In [15]:
def encodeFile(subDirectory, fileName):
    filePath = subDirectory+fileName
    print(filePath)
    return processFile(filePath)
    

In [41]:
def cleanFileAndSave(row, folder):
    encodedString = encodeFile('/data/maestro/', row['midi_filename'])
    withEnding = row['midi_filename'].split("/")[1]
    name = withEnding.split(".")[0]+".txt"
    text_file = open(musicPath+'/data/cleanMaestro/'+folder+"/"+ name, "w")
    n = text_file.write(encodedString)
    text_file.close()

In [42]:
def processDF(dataFrame, folder):
    for index, row in dataFrame.iterrows():
        cleanFileAndSave(row, folder)

In [None]:
train['midi_filename'].iloc[0]
# "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi"

In [43]:
processDF(train, "train")

/data/maestro/2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.midi
/data/maestro/2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MID--AUDIO_03_R2_2008_wav--2.midi
/data/maestro/2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-split_07-07-17_Piano-e_3-02_wav--3.midi
/data/maestro/2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MID--AUDIO_21_R1_2004_01_Track01_wav.midi
/data/maestro/2013/ORIG-MIDI_03_7_8_13_Group__MID--AUDIO_19_R2_2013_wav--3.midi
/data/maestro/2011/MIDI-Unprocessed_15_R1_2011_MID--AUDIO_R1-D6_09_Track09_wav.midi
/data/maestro/2006/MIDI-Unprocessed_19_R1_2006_01-07_ORIG_MID--AUDIO_19_R1_2006_04_Track04_wav.midi
/data/maestro/2013/ORIG-MIDI_02_7_10_13_Group_MID--AUDIO_11_R3_2013_wav--3.midi


KeyboardInterrupt: 

In [52]:
def parallelProcessDF(dataFrame, folder):
    with concurrent.futures.ProcessPoolExecutor() as executor:
#         results = [executor.submit(cleanFileAndSave, row, folder) for index, row in dataFrame.iterrows()]
        for index, row in dataFrame.iterrows():
            executor.submit(cleanFileAndSave, row, folder)
#         for f in concurrent.futures.as_completed(results):
#             print(f.result())

In [53]:
parallelProcessDF(train, "train")

/data/maestro/2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.midi
/data/maestro/2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MID--AUDIO_03_R2_2008_wav--2.midi
/data/maestro/2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-split_07-07-17_Piano-e_3-02_wav--3.midi
/data/maestro/2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MID--AUDIO_21_R1_2004_01_Track01_wav.midi
/data/maestro/2013/ORIG-MIDI_03_7_8_13_Group__MID--AUDIO_19_R2_2013_wav--3.midi
/data/maestro/2011/MIDI-Unprocessed_15_R1_2011_MID--AUDIO_R1-D6_09_Track09_wav.midi
/data/maestro/2006/MIDI-Unprocessed_19_R1_2006_01-07_ORIG_MID--AUDIO_19_R1_2006_04_Track04_wav.midi
/data/maestro/2013/ORIG-MIDI_02_7_10_13_Group_MID--AUDIO_11_R3_2013_wav--3.midi
/data/maestro/2011/MIDI-Unprocessed_09_R1_2011_MID--AUDIO_R1-D3_15_Track15_wav.midi
/data/maestro/2014/MIDI-UNPROCESSED_14-15_R1_2014_MID--AUDIO_15_R1_2014_wav--5.midi
/data/maestro/2008/MIDI-Unprocessed_01_R1_2008_01-04_ORIG_MID--AUDIO_01_R1_2008_wav--3.midi
/data/maestro/2006/MIDI-U

KeyboardInterrupt: 

/data/maestro/2018/MIDI-Unprocessed_Recital17-19_MID--AUDIO_18_R1_2018_wav--2.midi
/data/maestro/2017/MIDI-Unprocessed_079_PIANO079_MID--AUDIO-split_07-09-17_Piano-e_1-04_wav--4.midi
/data/maestro/2018/MIDI-Unprocessed_Recital5-7_MID--AUDIO_07_R1_2018_wav--2.midi
/data/maestro/2018/MIDI-Unprocessed_Recital9-11_MID--AUDIO_11_R1_2018_wav--2.midi
/data/maestro/2018/MIDI-Unprocessed_Chamber6_MID--AUDIO_20_R3_2018_wav--2.midi
/data/maestro/2004/MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_12_Track12_wav.midi
/data/maestro/2018/MIDI-Unprocessed_Recital12_MID--AUDIO_12_R1_2018_wav--2.midi
/data/maestro/2009/MIDI-Unprocessed_09_R1_2009_01-04_ORIG_MID--AUDIO_09_R1_2009_09_R1_2009_04_WAV.midi
/data/maestro/2009/MIDI-Unprocessed_19_R1_2009_03-04_ORIG_MID--AUDIO_19_R1_2009_19_R1_2009_04_WAV.midi
/data/maestro/2008/MIDI-Unprocessed_07_R1_2008_01-04_ORIG_MID--AUDIO_07_R1_2008_wav--4.midi
/data/maestro/2014/MIDI-UNPROCESSED_21-22_R1_2014_MID--AUDIO_21_R1_2014_wav--6.midi
/data/maestr

/data/maestro/2013/ORIG-MIDI_03_7_10_13_Group_MID--AUDIO_15_R3_2013_wav--1.midi
/data/maestro/2013/ORIG-MIDI_03_7_10_13_Group_MID--AUDIO_17_R3_2013_wav--2.midi
/data/maestro/2013/ORIG-MIDI_03_7_10_13_Group_MID--AUDIO_18_R3_2013_wav--3.midi
/data/maestro/2011/MIDI-Unprocessed_03_R3_2011_MID--AUDIO_R3-D1_04_Track04_wav.midi
/data/maestro/2011/MIDI-Unprocessed_04_R3_2011_MID--AUDIO_R3-D2_04_Track04_wav.midi
/data/maestro/2011/MIDI-Unprocessed_06_R3_2011_MID--AUDIO_R3-D3_02_Track02_wav.midi
/data/maestro/2011/MIDI-Unprocessed_23_R3_2011_MID--AUDIO_R3-D8_05_Track05_wav.midi
/data/maestro/2011/MIDI-Unprocessed_16_R3_2011_MID--AUDIO_R3-D5_02_Track02_wav.midi
/data/maestro/2008/MIDI-Unprocessed_07_R3_2008_01-05_ORIG_MID--AUDIO_07_R3_2008_wav--2.midi
/data/maestro/2015/MIDI-Unprocessed_R2_D1-2-3-6-7-8-11_mid--AUDIO-from_mp3_06_R2_2015_wav--2.midi
/data/maestro/2015/MIDI-Unprocessed_R2_D2-12-13-15_mid--AUDIO-from_mp3_13_R2_2015_wav--1.midi
/data/maestro/2015/MIDI-Unprocessed_R2_D2-12-13-15_mid--

  


/data/maestro/2015/MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_01_R1_2015_wav--5.midi
/data/maestro/2015/MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3_02_R1_2015_wav--6.midi
/data/maestro/2015/MIDI-Unprocessed_R1_D1-9-12_mid--AUDIO-from_mp3_12_R1_2015_wav--5.midi
/data/maestro/2017/MIDI-Unprocessed_046_PIANO046_MID--AUDIO-split_07-06-17_Piano-e_2-02_wav--3.midi


In [None]:
processFile("/data/maestro/2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MID--AUDIO_03_R2_2008_wav--2.midi")