In [26]:
import time
import scipy
import numpy as np
import pandas as pd
import winsound

import networkx as nx
import matplotlib

from sklearn.preprocessing import LabelEncoder
import torch
from torch_geometric.data import HeteroData

from HeteroDataFunctions import Encoder, add_types, complete_graph, flatten_lol, node_cat_dict, midi_type

print(scipy.__version__)
print(matplotlib.__version__)
print(nx.__version__)

1.7.3
3.6.2
2.8.4


# 3 Songs Example

In [2]:
G = complete_graph(".\slac\embeddings\Test Edgelists")

loading edgelists...
- full_edgelist.edgelist
- notes.edgelist
- program.edgelist
- tempo.edgelist
- time.signature.edgelist
Nodes: 2703
Edges: 19366


In [3]:
nodes = pd.DataFrame((list(G.nodes)), columns=['name'])
edges = pd.DataFrame(np.array(list(G.edges)), columns=['source', 'target'])

In [4]:
node_categories = node_cat_dict(nodes)

node_categories.keys()

node_cat_dict took 0.01 secs to run


dict_keys(['note_group', 'pitch', 'program', 'MIDI', 'duration', 'velocity', 'time_sig', 'tempo'])

In [5]:
node_categories['tempo']

['9', '6', '11']

In [6]:
main_edge_types = ["MIDI__has__tempo",
                   "MIDI__in__time_sig",
                   "MIDI__has__program",
                   "MIDI__has__note_group",
                   "note_group__has__velocity",
                   "note_group__has__duration",
                   "note_group__contains__pitch"]



In [7]:
nodes_f, edges_f = add_types(nodes, edges, node_categories)


add_node_type took 0.54 secs to run
add_edge_type took 3.01 secs to run
add_types took 3.55 secs to run


In [8]:
edges_f.loc[edges_f['edge_type'] == 'MIDI__has__note_group', ['source', 'target']]

Unnamed: 0,source,target
0,-Albert_King_-_Born_Under_A_Bad_Sign,g1601074
1,-Albert_King_-_Born_Under_A_Bad_Sign,g1577049
2,-Albert_King_-_Born_Under_A_Bad_Sign,g1575127
3,-Albert_King_-_Born_Under_A_Bad_Sign,g-1872221027
4,-Albert_King_-_Born_Under_A_Bad_Sign,g795292196
...,...,...
18857,-B_B_King_-_Rock_Me_Baby,g1750992
18858,-B_B_King_-_Rock_Me_Baby,g415858330
18859,-B_B_King_-_Rock_Me_Baby,g1606844
18860,-B_B_King_-_Rock_Me_Baby,g1605881


## Graph Build

In [9]:
data = HeteroData()

data['MIDI'].x = node_categories['MIDI']
data['note_group'].x = node_categories['note_group']

data['MIDI', 'has', 'note_group'].edge_index = edges_f.loc[edges_f['edge_type'] == 'MIDI__has__note_group', ['source', 'target']]

In [10]:
data.metadata()

(['MIDI', 'note_group'], [('MIDI', 'has', 'note_group')])

In [11]:
data['MIDI']

{'x': ['-B_B_King_-_Rock_Me_Baby', '-Albert_King_-_Born_Under_A_Bad_Sign', '-B_B_King_-_How_Blue_Can_You_Get']}

In [12]:
node_edge_categories = node_categories.copy()
node_edge_categories['node_types'] = list(node_categories.keys())
node_edge_categories['main_edge_types'] = main_edge_types  # Dictionary containing every string that may be found in our Dataframes

In [13]:
names_list = flatten_lol(node_edge_categories.values())


In [14]:
len(names_list) == len(set(names_list))

True

In [15]:
nodes_f.iloc[0][0]

'-Albert_King_-_Born_Under_A_Bad_Sign'

In [16]:
encoder = Encoder(names_list)

In [17]:
nodes_ten = encoder.encode_nodes(nodes_f)

nodes_ten.numpy()

encode_nodes took 0.48 secs to run


array([[2639, 2706],
       [   0, 2703],
       [   1, 2703],
       ...,
       [2614, 2704],
       [2554, 2704],
       [2579, 2704]])

In [18]:
edges_ten = encoder.encode_edges(edges_f)
edges_ten.numpy()

encode_edges took 1.90 secs to run


array([[2639,    0, 2714],
       [2639,    1, 2714],
       [2639,    2, 2714],
       ...,
       [2552, 2565, 2717],
       [2553, 2609, 2717],
       [2553, 2667, 2716]])

In [19]:
edges_ten[:, :2]

tensor([[2639,    0],
        [2639,    1],
        [2639,    2],
        ...,
        [2552, 2565],
        [2553, 2609],
        [2553, 2667]], dtype=torch.int32)

In [20]:
node_type = nodes_f.iloc[:, 1]

# Get the source and target indices from the edges tensor
edge_index = edges_ten[:, :2]

# Get the edge types from the edges tensor
edge_type = edges_f.iloc[:, 2]


In [21]:
hetero_graph = HeteroData(x=nodes_ten, node_type=node_type, edge_index=edge_index, edge_type=edge_type)

In [22]:
print(hetero_graph)

HeteroData(
  x=[2703, 2],
  node_type=0             MIDI
1       note_group
2       note_group
3       note_group
4       note_group
           ...    
2698       program
2699         tempo
2700         pitch
2701         pitch
2702         pitch
Name: node_type, Length: 2703, dtype: object,
  edge_index=[19366, 2],
  edge_type=0              MIDI__has__note_group
1              MIDI__has__note_group
2              MIDI__has__note_group
3              MIDI__has__note_group
4              MIDI__has__note_group
                    ...             
19361      note_group__has__duration
19362    note_group__contains__pitch
19363    note_group__contains__pitch
19364    note_group__contains__pitch
19365      note_group__has__duration
Name: edge_type, Length: 19366, dtype: object
)
