In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from mido import MidiFile


In [2]:
base_dir = 'data'
midi_folders = os.listdir(base_dir)

In [3]:
midi_folders[2:3]

['2008']

In [4]:
midi_names = []
for folder in midi_folders:
    midi_loc = os.path.join(base_dir, folder)
    for s in os.listdir(midi_loc):
        midi_names.append(folder +'\\'+ s)

In [5]:
def clockToPixel(tmp, div, pxl_sec, clk):
    music_length = tmp * (clk / div) / 1000000.0
    return int(np.ceil(music_length/pxl_sec))

In [6]:
def paintSound(a_img, note, vel, program, start_pxl, end_pxl):
    if a_img[img_heigth - 1 - (note - min_note)][start_pxl][0] == 0 and a_img[img_heigth - 1 - (note - min_note)][end_pxl][0] == 0:
        a_img[img_heigth - 1 - (note - min_note)][start_pxl][0] = 125
        a_img[img_heigth - 1 - (note - min_note)][start_pxl][1] = vel
        a_img[img_heigth - 1 - (note - min_note)][start_pxl][2] = program
        for i in range (start_pxl + 1, end_pxl + 1):
            a_img[img_heigth - 1 - (note - min_note)][i][0] = 255
            a_img[img_heigth - 1 - (note - min_note)][i][1] = vel
            a_img[img_heigth - 1 - (note - min_note)][i][2] = program
    return a_img

In [7]:
pixel_sec = 0.05
tempo = 500000.0
division = 480.0
img_heigth = 88
width = 200
min_note = 21
split = True
channels = 3

In [8]:
def save_img(a_img, fname, year):
    a_img = a_img.astype(np.uint8)
    if(split == False):
        image = Image.fromarray(a_img, mode = 'RGB')
        logdir = 'C:/NHF/imagesDenseNotSplit/{0}'.format(year)
        if(os.path.exists(logdir) == False):
            os.makedirs(logdir)
        image.save('C:/NHF/imagesDenseNotSplit/{0}/{1}.png'.format(year, fname))
        
    else:
        length = a_img.shape[1]
        for i in range(np.floor(length/width).astype(int)):
            end = min(length, (i+1)*width)
            sp_img = a_img[:,i*width:end,:]
            image = Image.fromarray(sp_img, mode = 'RGB')
            logdir = 'C:/NHF/imagesDenseSplit/{0}'.format(year)
            if(os.path.exists(logdir) == False):
                os.makedirs(logdir)
            image.save('C:/NHF/imagesDenseSplit/{0}/{1}_{2}.png'.format(year, fname, i))

In [9]:
for file_name in midi_names:
    print(file_name)
    midi_name = file_name.split('\\')[1].split('.')[0]
    year = file_name.split('\\')[0]
    mid = MidiFile(base_dir + '\\' + file_name) 
    
    message_list = []
    for i in mid.tracks[1][1:-1]: 
        message_list.append(i) 
        
    message_strings = []
    for x in message_list:
        message_strings.append(str(x))
        
    message_strings_split = []
    for message in message_strings:  
        split_str = message.split(" ")
        message_strings_split.append(split_str)
        
    message_type = []
    for item in message_strings_split:
        message_type.append(item[0])
    df1 = pd.DataFrame(message_type)
    df1.columns = ['message_type']
    
    attributes = []
    for item in message_strings_split:
        attributes.append(item[1:])
    attributes_dict = [{}]    
    for item in attributes:
        for i in item:
            key, val = i.split("=")
            if key in attributes_dict[-1]:
                attributes_dict.append({})
            attributes_dict[-1][key] = val
    df2 = pd.DataFrame.from_dict(attributes_dict)
    
    df_complete = pd.concat([df1, df2], axis=1)
    
    #Transform the time and note attributes from strings to floats
    df_complete.time = df_complete.time.astype(float)

    #Engineer a time elapsed attribute equal to the cumulative sum of time.
    df_complete['time_elapsed'] = df_complete.time.cumsum()
    
    df_filtered = df_complete[df_complete['message_type']=='note_on']
    df_filtered.note = df_filtered.note.astype(int)
    df_filtered.velocity = df_filtered.velocity.astype(int)
    
    try:
        df_filtered.drop('channel', axis=1, inplace=True)
        df_filtered.drop('time', axis=1, inplace=True)
        df_filtered.drop('program', axis=1, inplace=True)
        df_filtered.drop('value', axis=1, inplace=True)
        df_filtered.drop('control', axis=1, inplace=True)
    except:
        pass
    df_filtered_sorted = df_filtered.sort_values(by=['note', 'time_elapsed'])
    
    max_time = df_filtered['time_elapsed'].iloc[-1]
    img_width = clockToPixel(tempo, division, pixel_sec, max_time) + 1
    img = np.zeros(shape=(img_heigth, img_width, channels))
    
    par = 0
    start_px = 0
    end_px = 0
    program = 0
    vel = 0
    for index, row in df_filtered_sorted.iterrows():
        note = row['note']
        if(par % 2 == 0):
            start_px = clockToPixel(tempo, division, pixel_sec, row['time_elapsed'])
            vel = row['velocity']
        else:
            end_px = clockToPixel(tempo, division, pixel_sec, row['time_elapsed'])
            img = paintSound(img, note, vel, program, start_px, end_px)
        par += 1
    save_img(img, midi_name, year)

2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.midi
2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.midi
2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_10_Track10_wav.midi
2004\MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_02_Track02_wav.midi
2004\MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav.midi
2004\MIDI-Unprocessed_SMF_05_R1_2004_02-03_ORIG_MID--AUDIO_05_R1_2004_06_Track06_wav.midi
2004\MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_02_Track02_wav.midi
2004\MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_04_Track04_wav.midi
2004\MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_06_Track06_wav.midi
2004\MIDI-Unprocessed_SMF_07_R1_2004_01_ORIG_MID--AUDIO_07_R1_2004_12_Track12_wav.midi
2004\MIDI-Unprocessed_SMF_12_01_2004_01-05_ORIG_MID--AUDIO_12_R1_2004_03_Track03_wav--1.midi
2004\MIDI-Unprocessed_SMF

In [9]:
midi_name = midi_names[0].split('\\')[1].split('.')[0]
year = midi_names[0].split('\\')[0]
mid = MidiFile(base_dir + '\\' + midi_names[0]) 

message_list = []
for i in mid.tracks[1][1:-1]: 
    message_list.append(i) 

message_strings = []
for x in message_list:
    message_strings.append(str(x))

message_strings_split = []
for message in message_strings:  
    split_str = message.split(" ")
    message_strings_split.append(split_str)

message_type = []
for item in message_strings_split:
    message_type.append(item[0])
df1 = pd.DataFrame(message_type)
df1.columns = ['message_type']

attributes = []
for item in message_strings_split:
    attributes.append(item[1:])
attributes_dict = [{}]    
for item in attributes:
    for i in item:
        key, val = i.split("=")
        if key in attributes_dict[-1]:
            attributes_dict.append({})
        attributes_dict[-1][key] = val
df2 = pd.DataFrame.from_dict(attributes_dict)

df_complete = pd.concat([df1, df2], axis=1)

df_complete.time = df_complete.time.astype(float)

#Engineer a time elapsed attribute equal to the cumulative sum of time.
df_complete['time_elapsed'] = df_complete.time.cumsum()

df_filtered = df_complete[df_complete['message_type']=='note_on']
df_filtered.note = df_filtered.note.astype(int)
df_filtered.velocity = df_filtered.velocity.astype(int)

try:
    df_filtered.drop('channel', axis=1, inplace=True)
    df_filtered.drop('time', axis=1, inplace=True)
    df_filtered.drop('program', axis=1, inplace=True)
    df_filtered.drop('value', axis=1, inplace=True)
    df_filtered.drop('control', axis=1, inplace=True)
except:
    pass
df_filtered_sorted = df_filtered.sort_values(by=['note', 'time_elapsed'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [10]:
df_filtered = df_complete[df_complete['message_type']=='note_on']

In [10]:
df_filtered_sorted

Unnamed: 0,message_type,control,value,note,velocity,time_elapsed
3307,note_on,,,38,36,98997.0
3351,note_on,,,38,0,100169.0
4530,note_on,,,38,66,136768.0
4570,note_on,,,38,0,138004.0
5152,note_on,,,38,74,158054.0
...,...,...,...,...,...,...
25719,note_on,,,84,0,841996.0
26690,note_on,,,84,88,864532.0
26693,note_on,,,84,0,864616.0
28943,note_on,,,84,84,916035.0
