# Monophonic profiling w/ plotly experiments

In [141]:
import pandas as pd 
import os
import xml.etree.ElementTree as ET  # https://docs.python.org/3/library/xml.etree.elementtree.html
import warnings
warnings.simplefilter('ignore')
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(True)

DOWNLOAD_URL = 'https://www.idmt.fraunhofer.de/en/business_units/m2d/smt/guitar.html'
DOWNLOAD_PATH = '/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/'
GUITAR_TYPES = os.listdir(DOWNLOAD_PATH)

# Sample XML annotations file

In [32]:
"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<instrumentRecording>
  <globalParameter>
    <audioFileName>G53-41101-1111-00002.wav</audioFileName>
    <instrument>EGUI</instrument>
    <recordingDate>07.01.2013</recordingDate>
  </globalParameter>
  <transcription>
    <event>
      <pitch>41</pitch>
      <onsetSec>0.2</onsetSec>
      <offsetSec>2.5</offsetSec>
      <fretNumber>1</fretNumber>
      <stringNumber>1</stringNumber>
      <excitationStyle>PK</excitationStyle>
      <expressionStyle>NO</expressionStyle>
    </event>
  </transcription>
</instrumentRecording>"""

'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<instrumentRecording>\n  <globalParameter>\n    <audioFileName>G53-41101-1111-00002.wav</audioFileName>\n    <instrument>EGUI</instrument>\n    <recordingDate>07.01.2013</recordingDate>\n  </globalParameter>\n  <transcription>\n    <event>\n      <pitch>41</pitch>\n      <onsetSec>0.2</onsetSec>\n      <offsetSec>2.5</offsetSec>\n      <fretNumber>1</fretNumber>\n      <stringNumber>1</stringNumber>\n      <excitationStyle>PK</excitationStyle>\n      <expressionStyle>NO</expressionStyle>\n    </event>\n  </transcription>\n</instrumentRecording>'

# Create annotations file

In [309]:
all_meta = []
for model in GUITAR_TYPES:
    annoation_files = DOWNLOAD_PATH + model + '/annotation'
    for path , dirs, all_files in os.walk(annoation_files):        
        for meta in all_files:  
            filepath = path + '/' + meta

            try:
                
                # INTRODUCING XML LIBRARY 
                tree = ET.parse(filepath)
                root = tree.getroot()
                record = {}
                record['guitarModel'] = model
                # COMMON FLOW FOR ITERATING OVER XML
                for meta_attribute in root:
                    # TWO PARTS TO AN XML ANNOTATIONS FILE - FILE DATA (FIRST) and TRACK DATA (second)
                    if meta_attribute.tag == 'globalParameter':
                        for field in meta_attribute:
                            record[field.tag] = field.text
                    else:
                        # FILE DATA
                        for field in meta_attribute.find('event'):
                            record[field.tag] = field.text
                all_meta.append(record)
            except Exception as e:
                print (e)
                # FOR ANY NON-XML FILES
                print ('Non-xml file type found, skipping!')
                pass

# annotation_sum_df = pd.DataFrame(all_meta)

/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-40100-1111-00001.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-41101-1111-00002.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-42102-1111-00003.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-43103-1111-00004.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-44104-1111-00005.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-45105-1111-00006.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-45200-1111-00014.xml
/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-46106-1111-00007.xml
/Users/BhavishDa

In [313]:
import xmltodict

test_file = '/Users/BhavishDaswani/Downloads/IDMT-SMT-GUITAR_V2/dataset1/Fender Strat Clean Neck SC/annotation/G53-56401-1111-00041.xml'
with open(test_file) as fd:
    parsed_xml = xmltodict.parse(fd.read())

In [317]:
parsed_xml.get('instrumentRecording').get('globalParameter')

OrderedDict([('audioFileName', 'G53-56401-1111-00041.wav'),
             ('instrument', 'EGUI'),
             ('recordingDate', '07.01.2013')])

In [284]:
annotation_sum_df.head()

Unnamed: 0,audioFileName,excitationStyle,expressionStyle,fretNumber,guitarModel,instrument,offsetSec,onsetSec,pitch,recordingDate,stringNumber
0,G53-40100-1111-00001.wav,PK,NO,0,Fender Strat Clean Neck SC,EGUI,2.5,0.2,40,07.01.2013,1
1,G53-41101-1111-00002.wav,PK,NO,1,Fender Strat Clean Neck SC,EGUI,2.5,0.2,41,07.01.2013,1
2,G53-42102-1111-00003.wav,PK,NO,2,Fender Strat Clean Neck SC,EGUI,2.5,0.2,42,07.01.2013,1
3,G53-43103-1111-00004.wav,PK,NO,3,Fender Strat Clean Neck SC,EGUI,2.5,0.2,43,07.01.2013,1
4,G53-44104-1111-00005.wav,PK,NO,4,Fender Strat Clean Neck SC,EGUI,2.5,0.2,44,07.01.2013,1


# REDUCE DATASET TO SINGLE NOTE

In [285]:
# ADDING A MASK TO OMIT CHORDS - MARKED BY MAJOR / MINOR TAGS
single_chord_filter = annotation_sum_df.audioFileName.str.contains(r'(major|minor)', case = False)
single_note_df = annotation_sum_df.loc[~single_chord_filter] 

In [287]:
print ('{} files available to us'.format(single_note_df.shape[0]))

312 files available to us


# SANITY CHECKS

In [288]:
# NO STYLE AUGMENTATION
assert single_note_df.groupby(['excitationStyle', 'expressionStyle']).size().index.values.shape[0] == 1
# ONLY ONE INSTRUMENT - ELECTRIC GUITAR - REPRESENTED
assert len(single_note_df['instrument'].unique()) == 1
# NO FILE DUPLICATION
assert len(single_note_df) == len(single_note_df.audioFileName.unique()), 'Duplicate files found'
'ALL CRITERIA PASSED'

'ALL CRITERIA PASSED'

# ISSUES WITH TYPE READING
rereading annotations data from CSV will avoid this problem!

In [272]:
single_note_df[['offsetSec', 'onsetSec']] = single_note_df[['offsetSec', 'onsetSec']].apply(pd.to_numeric)

In [273]:
single_note_df[['offsetSec', 'onsetSec']].dtypes

offsetSec    float64
onsetSec     float64
dtype: object

# .WAV(s) / Model - Single Note - Even distribution!

In [280]:
import plotly.graph_objs as go

In [289]:
guitar_model_counts = single_note_df.groupby('guitarModel').size()
trace = go.Bar(x = guitar_model_counts.index.values,
        y = guitar_model_counts.values,
        marker = dict(color = ['rgba(255,0,0, 0.3)', 'rgba(255,0, 255, 0.3)', 'rgba(1, 255,0, 0.3)'])
      )
data = [trace]
layout = {
    'title' : 'Model-wise distribution - Single Note',
    'xaxis' : {'title' : 'Models'},
    'yaxis' : {'title' : 'N .wav files'},
    'autosize' : True
}

fig = go.Figure(data = data, layout = layout)
iplot(fig)

# Onset time / Model - Single Note

In [282]:
# CREATE A NEW COLUMN REPRESENTING ONSET TIME
single_note_df['window'] = single_note_df['offsetSec'] - single_note_df['onsetSec'] 

In [275]:
single_note_df['window'].mean() 

2.2098387820512824

In [283]:
trace = []
fill_colors = ["rgb(50,50,50)", "rgb(150,150, 150)", "rgb(220,220,220)", "rgb(165,165,165)"]
ix = 0
grouped_min_val = 0
grouped_max_val = 0
# ITERATING OVER PANDAS GROUPBY OBJECT AND PLOTTING ONSET VALUES
for grp, df in single_note_df.groupby('guitarModel'):
    values = df['window'].values
    
    # FOLLOWING 2 IF STATEMENTS STORING OUT OF SCOPE MIN AND MAX Y VALUES TO CENTER DATA
    if np.max(values) > grouped_max_val:
        grouped_max_val = np.max(values)
    if np.min(values) < grouped_min_val:
        grouped_min_val = np.min(values)
        
    # BACK TO GRAPH MAKING
    trace.append(go.Box(
        y = values, 
        name = grp, 
        fillcolor = fill_colors[ix],
        line = dict(color = "rgb(0,0,0)"),
        showlegend = False,
        boxpoints = 'all',  
        jitter = 0.2
    ))
    ix += 1
data = trace

layout = {'title' : 'Model-wise onset times', 
          "yaxis" : {'range' : [grouped_min_val, grouped_max_val * 1.2]}
         
         }

iplot({"data" : data, 'layout' : layout})

# FRET VS STRING NUMBER TABULATIONS
Even distribution, again!

In [278]:
import plotly.figure_factory as ff

cross_tabulated = pd.crosstab(single_note_df.stringNumber, single_note_df.fretNumber, margins = True)
cross_tabulated

fretNumber,0,1,10,11,12,2,3,4,5,6,7,8,9,All
stringNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,4,4,4,4,4,4,4,4,4,4,4,4,4,52
2,4,4,4,4,4,4,4,4,4,4,4,4,4,52
3,4,4,4,4,4,4,4,4,4,4,4,4,4,52
4,4,4,4,4,4,4,4,4,4,4,4,4,4,52
5,4,4,4,4,4,4,4,4,4,4,4,4,4,52
6,4,4,4,4,4,4,4,4,4,4,4,4,4,52
All,24,24,24,24,24,24,24,24,24,24,24,24,24,312
