In [30]:
import time
import scipy
import numpy as np
import pandas as pd
import winsound

import networkx as nx
import matplotlib

from sklearn.preprocessing import LabelEncoder
import torch

from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
from torch_geometric.nn import HGTConv, SAGEConv, GATConv, Linear, to_hetero
from torch.nn.functional import cross_entropy

from HeteroDataFunctions import Encoder, add_types, complete_graph, flatten_lol, node_cat_dict, midi_type

print(scipy.__version__)
print(matplotlib.__version__)
print(nx.__version__)

1.7.3
3.6.2
2.8.4


In [2]:
# Complete Dataset
G = complete_graph(".\slac\embeddings\\all")

loading edgelists...
- notes.edgelist
- program.edgelist
- tempo.edgelist
- time.signature.edgelist
Nodes: 93553
Edges: 786635


In [3]:
nodes = pd.DataFrame((list(G.nodes)), columns=['name'])
edges = pd.DataFrame(np.array(list(G.edges)), columns=['source', 'target'])

In [4]:
node_categories = node_cat_dict(nodes)
node_categories.keys()

node_cat_dict took 0.18 secs to run


dict_keys(['note_group', 'pitch', 'program', 'MIDI', 'duration', 'velocity', 'time_sig', 'tempo'])

In [5]:
%%script false --no- raise -error

nodes_df_complete, edges_df_complete = add_types(nodes, edges, node_categories)

winsound.Beep(400, 700)

nodes_df_complete.to_csv('nodes_complete.csv')
edges_df_complete.to_csv('edges_complete.csv')

Couldn't find program: 'false'


In [6]:
nodes_df_complete = pd.read_csv('.\slac\Contents of Slac\\nodes_complete.csv')
edges_df_complete = pd.read_csv('.\slac\Contents of Slac\edges_complete.csv')
print('Done')

Done


In [7]:
node_types = set(nodes_df_complete['node_type'])
node_types


{'MIDI',
 'duration',
 'note_group',
 'pitch',
 'program',
 'tempo',
 'time_sig',
 'velocity'}

In [8]:
edge_types = ["MIDI__has__tempo",
                   "MIDI__in__time_sig",
                   "MIDI__has__program",
                   "MIDI__has__note_group",
                   "note_group__has__velocity",
                   "note_group__has__duration",
                   "note_group__contains__pitch"]

In [9]:
names_list = flatten_lol(node_categories.values())


In [10]:
encoder = Encoder(names_list, n_labels=5)


In [11]:
input_node_dict = {node_type: {'x': encoder.
                    encode_nodes(nodes_df_complete.
                    loc[nodes_df_complete['node_type'] == node_type, ['name']])}
                    for node_type in node_types}

encode_nodes took 0.02 secs to run
encode_nodes took 0.00 secs to run
encode_nodes took 0.02 secs to run
encode_nodes took 0.00 secs to run
encode_nodes took 0.00 secs to run
encode_nodes took 3.43 secs to run
encode_nodes took 0.00 secs to run
encode_nodes took 0.01 secs to run


In [12]:
node_enc_to_idx = {node_type: {encoder.decode_value(node_enc.item()): i for i, node_enc in enumerate(input_node_dict[node_type]['x'])} for node_type in node_types}

In [14]:
input_edge_dict1 = dict()
for edge_type in edge_types:
    node_type_s, node_type_t = edge_type.split('__')[0], edge_type.split('__')[2]

    edge_df = edges_df_complete.loc[edges_df_complete['edge_type'] == edge_type, ['source', 'target']].copy()

    edge_df['source'], edge_df['target'] = edge_df['source'].map(node_enc_to_idx[node_type_s]), edge_df['target'].map(node_enc_to_idx[node_type_t])

    input_edge_dict1[edge_type] = {'edge_index': torch.tensor(edge_df.values).T}


In [15]:
input_edge_dict1

{'MIDI__has__tempo': {'edge_index': tensor([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
            14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
            28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
            42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
            56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
            70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
            84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
            98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
           112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
           126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
           140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
           154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166,

In [19]:
# Extract the label of each Midi.
midi_val = nodes_df_complete.loc[nodes_df_complete['node_type'] == 'MIDI', ['name']].values
midi_class = [midi_type(s[0]) for s in midi_val]

lb = LabelEncoder()
y = torch.from_numpy(lb.fit_transform(midi_class)) # .type(torch.LongTensor)

lb.classes_

array(['Blues', 'Classical', 'Jazz', 'Rap', 'Rock'], dtype='<U9')

In [20]:
input_node_dict['MIDI']['y'] = y

In [21]:
H = HeteroData(input_node_dict, **input_edge_dict1)

In [22]:
print(H)

HeteroData(
  [1mvelocity[0m={ x=[11, 1] },
  [1mprogram[0m={ x=[108, 1] },
  [1mduration[0m={ x=[570, 1] },
  [1mpitch[0m={ x=[93, 1] },
  [1mtempo[0m={ x=[23, 1] },
  [1mnote_group[0m={ x=[92484, 1] },
  [1mtime_sig[0m={ x=[14, 1] },
  [1mMIDI[0m={
    x=[250, 1],
    y=[250]
  },
  [1m(MIDI, has, tempo)[0m={ edge_index=[2, 250] },
  [1m(MIDI, in, time_sig)[0m={ edge_index=[2, 239] },
  [1m(MIDI, has, program)[0m={ edge_index=[2, 1392] },
  [1m(MIDI, has, note_group)[0m={ edge_index=[2, 135160] },
  [1m(note_group, has, velocity)[0m={ edge_index=[2, 118626] },
  [1m(note_group, has, duration)[0m={ edge_index=[2, 92484] },
  [1m(note_group, contains, pitch)[0m={ edge_index=[2, 438484] }
)


In [23]:
H = T.ToUndirected()(H)

# GNN

In [24]:
class GNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x


model = GNN(hidden_channels=64, out_channels=len(set(lb.classes_)))
model = to_hetero(model, H.metadata(), aggr='sum')

In [25]:
optimizer_name = "Adam"
lr = 1e-1
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)


In [31]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(H.x_dict, H.edge_index_dict)
    # mask = H['MIDI'].train_mask
    # loss = F.cross_entropy(out['MIDI'][mask], H['MIDI'].y[mask])
    loss = cross_entropy(out['MIDI'], H['MIDI'].y)
    loss.backward()
    optimizer.step()
    return float(loss)

In [32]:
train()

203271.25

 # Old Implementation

In [None]:
# nodes_ten_ = encoder.encode_nodes(nodes_df_complete)
# edges_ten_ = encoder.encode_edges(edges_df_complete)

# node_type_ = nodes_df_complete.iloc[:, 1]

# Get the source and target indices from the edges tensor
# edge_index = edges_ten_[:, :2]

## Get the edge types from the edges tensor
#edge_type_ = edges_df_complete.iloc[:, 2]

#full_hetero_graph = HeteroData(x=nodes_ten_, node_type=node_type_, edge_index=edge_index, edge_type=edge_type_)

In [None]:
# edges_df_complete.loc[edges_df_complete['edge_type'] == 'MIDI__has__tempo', ['source', 'target']]

In [None]:
# full_categories = node_categories.copy()
# full_categories['node_types'] = list(node_categories.keys())
# full_categories['edge_types'] = edge_types  # Dictionary containing every string that may be found in our Dataframes
# names_list_full = flatten_lol(full_categories.values())

In [18]:
# input_edge_dict = {edge_type: {'edge_index': encoder.encode_edges(edges_df_complete.loc[
#                    edges_df_complete['edge_type'] == edge_type, ['source', 'target']])} for edge_type in edge_types}
# for key in input_edge_dict.keys():
#     input_edge_dict[key]['edge_index'] = input_edge_dict[key]['edge_index'].T.to(torch.int64)
