In [1]:
import src.load_data as load_data
from src.feature_matrix import create_feature_matrix
import seaborn as sns
import pandas as pd

# load in the data
joined = load_data.data_pipeline()
feature_matrix = create_feature_matrix(joined, melody_note_lags=range(1,5), chord_root_lags=range(-1,-2,-1), chord_type_lags=range(-1,-2,-1))


In [3]:
y = feature_matrix['chord_root_melody_note_interval']
X = feature_matrix.drop(['chord_root_melody_note_interval'], axis=1)
X = pd.get_dummies(X)

In [4]:
from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB()
clf.fit(X, y)
pred_probs = clf.predict_proba(X)
pred = clf.predict(X)

In [5]:
eval_df = pd.concat([pd.DataFrame({'actual': y.reset_index(drop=True), 'pred': pred}), pd.DataFrame(pred_probs, columns=clf.classes_)], axis=1)
eval_df.index = y.index
eval_df

Unnamed: 0,actual,pred,#4,1,2,3,4,5,6,7,b2,b3,b6,b7
4,4,3,0.011510,0.216602,0.153630,0.292923,0.088032,0.133199,0.017287,0.043484,0.014028,0.016688,0.004950,0.007665
5,1,5,0.067971,0.101107,0.146041,0.157973,0.055543,0.276808,0.037907,0.026711,0.018645,0.082845,0.020447,0.008001
6,1,2,0.006644,0.134325,0.255132,0.056913,0.036057,0.142449,0.101446,0.097043,0.025027,0.079457,0.017935,0.047572
7,5,4,0.100768,0.110134,0.073463,0.045417,0.188148,0.054022,0.004703,0.018959,0.163826,0.184501,0.032587,0.023471
8,2,b3,0.095836,0.072418,0.033769,0.015319,0.157234,0.044749,0.033299,0.013992,0.025932,0.291847,0.105861,0.109743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200803,7,2,0.004799,0.088622,0.304549,0.099206,0.012141,0.085181,0.077335,0.284584,0.012157,0.006411,0.003454,0.021561
200804,5,1,0.004862,0.349619,0.171346,0.053746,0.004373,0.141535,0.179268,0.044768,0.006852,0.006778,0.008969,0.027885
200805,4,6,0.028858,0.077547,0.117796,0.140677,0.037433,0.096649,0.262152,0.173416,0.003409,0.008408,0.036696,0.016960
200806,5,3,0.039819,0.040147,0.094096,0.425373,0.016826,0.271123,0.048594,0.026383,0.003279,0.023062,0.008273,0.003026


In [6]:

from src.music_theory import calculate_note_from_interval

inspect = pd.merge(joined, eval_df, left_index=True, right_index=True, how='outer')[['melody_note', 'key', 'chord', 'chord_root', 'pred']]
inspect = calculate_note_from_interval(inspect, 'chord_root', 'pred', 'pred_note')
inspect = inspect.drop(['pred'], axis=1)
inspect


Unnamed: 0,melody_note,key,chord,chord_root,pred_note
0,F,Bb-maj,Bb6,Bb,
1,D#,Bb-maj,Bb6,Bb,
2,A#,Bb-maj,Bb6,Bb,
3,C#,Bb-maj,Bb6,Bb,
4,D#,Bb-maj,Bb6,Bb,D
...,...,...,...,...,...
200804,A,D-maj,D6,D,D
200805,G,D-maj,D6,D,B
200806,A,D-maj,D6,D,F#
200807,B,D-maj,D6,D,F#


In [154]:
import numpy as np

struct_df = joined.query('melid==1')[['onset', 'chord', 'chord_3rd', 'chord_5th', 'chord_7th', 'chord_root_num', 'melody_note_num']]
struct_df = create_feature_matrix(struct_df, melody_note_lags=range(1,5), chord_root_lags=range(-1,-2,-1), chord_type_lags=range(-1,-2,-1), group_by_col=None)

melody_cols = [col for col in struct_df.columns if 'chord_root_melody_note' in col]
init_notes = struct_df[melody_cols].iloc[0:1].drop(['chord_root_melody_note_interval'], axis=1)

struct_df = struct_df.drop(melody_cols, axis=1)

# struct_instance = struct_df.iloc[0:1]

def one_step_predict(struct_instance, notes_instance, clf):

    one_step_instance = pd.concat([struct_instance.reset_index(drop=True), notes_instance.reset_index(drop=True)], axis=1)
    one_step_instance = pd.get_dummies(one_step_instance)

    diff = set(clf.feature_names_in_).difference(set(one_step_instance.columns))

    one_step_instance[list(diff)] = 0
    one_step_instance = one_step_instance[clf.feature_names_in_]

    pred_probs = clf.predict_proba(one_step_instance)[0]

    p = np.power(pred_probs, 1)
    p = p / sum(p)
    choice = np.random.choice(clf.classes_, p=p)

    return choice

notes_instance = init_notes
all_notes = init_notes.to_numpy()
for i in range(len(struct_df)):
    struct_instance = struct_df.iloc[i:i+1]

    pred_note_instance = one_step_predict(struct_instance, notes_instance, clf)

    all_notes = np.append(all_notes, pred_note_instance)

    notes_instance.iloc[:, 1:4] = notes_instance.iloc[:, 0:3]

    notes_instance.iloc[:, 0] = pred_note_instance

In [151]:
struct_df['generated_notes'] = all_notes[4:]

generated_notes = struct_df['generated_notes']
generated_notes

inspect = pd.merge(joined.query('melid==1'), generated_notes, left_index=True, right_index=True, how='outer')[['melody_note', 'key', 'chord', 'chord_root', 'generated_notes']]
inspect = calculate_note_from_interval(inspect, 'chord_root', 'generated_notes', 'generated_note_names')
# inspect = inspect.drop(['pred'], axis=1)
inspect

Unnamed: 0,melody_note,key,chord,chord_root,generated_notes,generated_note_names
0,F,Bb-maj,Bb6,Bb,,
1,D#,Bb-maj,Bb6,Bb,,
2,A#,Bb-maj,Bb6,Bb,,
3,C#,Bb-maj,Bb6,Bb,,
4,D#,Bb-maj,Bb6,Bb,3,D
...,...,...,...,...,...,...
525,F#,Bb-maj,Bb6,Bb,1,A#
526,F,Bb-maj,Bb6,Bb,7,A
527,D#,Bb-maj,Bb6,Bb,1,A#
528,D,Bb-maj,Bb6,Bb,7,A
