In [None]:
!pip install numpy librosa ipython pretty_midi soundfile jams tensorflow

In [None]:
import numpy as np
import librosa
import os
from IPython.display import Audio
import pretty_midi
import soundfile as sf
import jams
np.int = int # deprecated np.int

In [None]:
midi_dir= ""

### Creating feature matrices for MIDI data, encoding expressive techniques

In [None]:
def midi_to_jams(midi_path):
    pm = pretty_midi.PrettyMIDI(midi_path)
    guitar_notes = pm.instruments[0].notes
    jam = jams.JAMS()
    
    note_ann = jams.Annotation(namespace='note')  # custom namespace
    for note in guitar_notes:
        note_ann.append(
            time=note.start,
            duration=note.end - note.start,
            value=note.pitch,
            confidence=note.velocity / 127
        )
    jam.annotations.append(note_ann)
    return jam

In [None]:
def print_jam(jam):
    for ann in jam.annotations:
        print(f"Namespace: {ann.namespace}")
        for obs in ann.data:
            print(f"Time: {obs.time:.3f}, Duration: {obs.duration:.3f}, Value: {obs.value}, Confidence: {obs.confidence}")

In [None]:
example_midi = os.path.join(midi_dir, '05_Funk2-108-Eb_solo.mid')
jam = midi_to_jams(example_midi)
print_jam(jam)

In [None]:
# sample techniques to encode
techniques_list = [
    'alternate picking', 'bend', 'hammer on', 'legato', 
    'pull off', 'slide', 'sweep picking', 'tapping', 'vibrato'
]

In [None]:
# sample notes data with expressive techniques
notes = [
    {'pitch': 58, 'time': 0.671, 'duration': 0.139, 'velocity': 0.504},
    {'pitch': 59, 'time': 0.854, 'duration': 0.209, 'velocity': 0.504},
    {'pitch': 61, 'time': 1.107, 'duration': 0.134, 'velocity': 0.504},
]
# Example techniques per note
note_techniques_seq = [
    ['alternate picking'],
    ['bend'],
    ['hammer on', 'vibrato']
]

In [None]:
def note_to_features(note, previous_note, note_techniques):
    """
    note: dict with keys 'pitch', 'time', 'duration', 'velocity'
    previous_note: previous note dict
    note_techniques: list of technique strings for this note
    """
    time_delta = note['time'] - previous_note['time'] if previous_note else 0.0
    technique_flags = [1 if t in note_techniques else 0 for t in techniques_list]
    
    features = [
        note['pitch'],
        note['duration'],
        note['velocity'],
        time_delta
    ] + technique_flags
    
    return features

In [None]:
def midi_to_feature_matrix(midi_path, midi_to_techniques_fn):
    """
    midi_path: path to a MIDI file
    midi_to_techniques_fn: function that returns list of techniques per note
    """
    pm = pretty_midi.PrettyMIDI(midi_path)
    notes = []

    for inst in pm.instruments:
        for n in inst.notes:
            notes.append({
                'pitch': n.pitch,
                'time': n.start,
                'duration': n.end - n.start,
                'velocity': n.velocity / 127.0  # normalize
            })

    notes.sort(key=lambda x: x['time'])
    X = []
    prev_note = None

    for i, note in enumerate(notes):
        note_techniques = midi_to_techniques_fn(i, note)  # user-defined mapping
        feat = note_to_features(note, prev_note, note_techniques)
        X.append(feat)
        prev_note = note

    return np.array(X, dtype=np.float32)

In [None]:
def encode_all_midis(midi_folder, midi_to_techniques_fn):
    feature_matrices = []
    midi_files = [f for f in os.listdir(midi_folder) if f.endswith('.mid')]

    for midi_file in midi_files:
        path = os.path.join(midi_folder, midi_file)
        X = midi_to_feature_matrix(path, midi_to_techniques_fn)
        feature_matrices.append(X)

    return feature_matrices

In [None]:
def dummy_techniques_fn(idx, note):
    # Just cycles through the techniques for demo purposes
    return [techniques_list[idx % len(techniques_list)]]

In [None]:
feature_matrices = encode_all_midis(midi_dir, dummy_techniques_fn)
print("feature matrix for one midi file")
print(feature_matrices[0])

In [None]:
import numpy as np

# feature_matrices is a list of arrays of shape (seq_len_i, feature_size)
# First, find the maximum sequence length
max_seq_len = max([fm.shape[0] for fm in feature_matrices])
feature_size = feature_matrices[0].shape[1]

# Pad sequences to max_seq_len
X_sequences = np.zeros((len(feature_matrices), max_seq_len, feature_size))
for i, fm in enumerate(feature_matrices):
    X_sequences[i, :fm.shape[0], :] = fm  # pad remaining with zeros

# Now create y_target for self-supervised learning (string/fret zeros)
y_target = np.concatenate([X_sequences, np.zeros((X_sequences.shape[0], max_seq_len, 2))], axis=-1)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking
import numpy as np

# hyperparameters
seq_len = 16  # max sequence length
feature_size = 13  # input features: pitch, duration, velocity, time_delta, technique flags
output_size = 2   # string & fret
hidden_units = 128
batch_size = 16
num_epochs = 50
max_fret_jump = 5.0

# Guitar tuning (standard)
# E2, A2, D3, G3, B3, E4 in MIDI numbers
tuning = [40, 45, 50, 55, 59, 64]  

def pitch_to_string_fret(pitch):
    """
    Map MIDI pitch to a playable string/fret combination.
    Returns (string_index, fret_number)
    Chooses the string with lowest fret possible.
    """
    possibilities = []
    for string_idx, open_pitch in enumerate(tuning):
        fret = pitch - open_pitch
        if 0 <= fret <= 24: 
            possibilities.append((string_idx, fret))
    if not possibilities:
        # pitch out of guitar range: default to string 0, fret 0
        return 0, 0
    # pick the string/fret with lowest fret
    return min(possibilities, key=lambda x: x[1])

# Prepare input sequences
# feature_matrices: list of arrays (seq_len_i, feature_size)
max_seq_len = seq_len
X_sequences = np.zeros((len(feature_matrices), max_seq_len, feature_size))
y_sequences = np.zeros((len(feature_matrices), max_seq_len, output_size))

for i, fm in enumerate(feature_matrices):
    length = min(fm.shape[0], max_seq_len)
    X_sequences[i, :length, :] = fm[:length]
    
    # populate string/fret baseline using pitch -> string/fret mapping
    for t in range(length):
        pitch = fm[t, 0]  # first feature is pitch
        s, f = pitch_to_string_fret(int(round(pitch)))
        y_sequences[i, t, 0] = s
        y_sequences[i, t, 1] = f

# Custom loss function to penalize large hand jumps with fret/string selection
def hand_jump_loss(y_true, y_pred):
    #MSE
    mse_loss = tf.reduce_mean(tf.square(y_true - y_pred))
    
    #Minimizing the squared difference in predicted string/fret positions between consecutive notes
    diff = y_pred[:, 1:, :] - y_pred[:, :-1, :]
    jump_penalty = tf.reduce_mean(tf.square(diff))
    
    return mse_loss + jump_penalty / max_fret_jump

# LSTM Model
model = Sequential([
    Masking(mask_value=0., input_shape=(max_seq_len, feature_size)),
    LSTM(hidden_units, return_sequences=True),
    Dense(output_size)  # predicts string & fret only
])
model.compile(optimizer='adam', loss=hand_jump_loss)

# Train
model.fit(
    X_sequences,
    y_sequences,
    batch_size=batch_size,
    epochs=num_epochs,
    validation_split=0.1
)

# Predict optimized string/fret
predicted_sf = model.predict(X_sequences)
predicted_strings = np.round(predicted_sf[..., 0]).astype(int)
predicted_frets = np.round(predicted_sf[..., 1]).astype(int)

In [None]:
sample_idx = 0 
#predicted_window = predicted_features[sample_idx]

print("Predicted strings:", predicted_strings[1])
print("Predicted frets  :", predicted_frets[1])

print("Feature vectors for this sequence/window:")
print(X_sequences[sample_idx])

In [None]:
def predicted_to_ascii_tab(predicted_sf, notes_per_row=50):
    """
    predicted_sf: array of shape (num_notes, 2)
    notes_per_row: how many notes to show per visual row
    """
    predicted_sf = np.array(predicted_sf)

    
    # Flatten if needed
    if predicted_sf.ndim == 3:
        predicted_sf = predicted_sf.reshape(-1, 2)
    elif predicted_sf.ndim != 2:
        raise ValueError(f"Unexpected predicted_sf shape: {predicted_sf.shape}")
    
    num_strings = 6
    string_names = ['e', 'B', 'G', 'D', 'A', 'E']
    
    ascii_tab = ''
    for start_idx in range(0, len(predicted_sf), notes_per_row):
        row_notes = predicted_sf[start_idx:start_idx+notes_per_row]
        
        # Initialize lines for this row
        row_lines = ['' for _ in range(num_strings)]
        
        for note in row_notes:
            string, fret = note
            string_idx = int(round(string)) - 1  # 0-indexed
            fret_str = str(int(round(fret)))
            
            for i in range(num_strings):
                if i == string_idx:
                    row_lines[num_strings - i - 1] += fret_str + '-'
                else:
                    row_lines[num_strings - i - 1] += '--'
        
        # Append row to ascii_tab
        for name, line in zip(string_names, row_lines):
            ascii_tab += f"{name}|{line}\n"
        ascii_tab += '\n'  # extra blank line between tab rows
    
    return ascii_tab

In [None]:
ascii_tab = predicted_to_ascii_tab(predicted_sf)
print(ascii_tab)