In [52]:
import os
from typing import Optional

import scipy

# from mido import MidiFile
import networkx as nx
from networkx_viewer import Viewer
import matplotlib
import matplotlib.pyplot as plt

import math
import numpy as np
import pandas as pd

from torch_geometric.data import HeteroData


print(scipy.__version__)
print(matplotlib.__version__)
print(nx.__version__)

1.7.3
3.6.2
2.8.4


In [53]:
def complete_graph(input_path) -> nx.Graph:
    edgelists = [qf for qf in os.listdir(input_path)
                 if qf.endswith('.edgelist') and not qf.startswith('_')]
    g = None

    print('loading edgelists...')
    for eg in edgelists:
        print('- ' + eg)
        h = nx.read_edgelist(os.path.join(input_path, eg), nodetype=str, create_using=nx.DiGraph(), delimiter=' ')
        for edge in h.edges():
            h[edge[0]][edge[1]]['weight'] = 1

        g = h if g is None else nx.compose(g, h)

    g = g.to_undirected()

    print('Nodes: %d' % nx.number_of_nodes(g))
    print('Edges: %d' % nx.number_of_edges(g))
    return g


In [54]:
G = complete_graph("C:\\Users\\vamvp\Desktop\Test Edgelists")

loading edgelists...
- full_edgelist.edgelist
- notes.edgelist
- program.edgelist
- tempo.edgelist
- time.signature.edgelist
Nodes: 2703
Edges: 19366


In [55]:
nodes = pd.DataFrame((list(G.nodes)), columns=['name'])
edges = pd.DataFrame(np.array(list(G.edges)), columns=['source', 'target'])

In [56]:
note_groups = [n for n in nodes['name'] if n[0] == 'g' and n[1] in [str(i) for i in range(10)] + ['-'] ]

not_group_nodes = [n for n in nodes['name'] if n not in note_groups]

url = [n for n in not_group_nodes if n[:4] == 'http']
program_nodes = []
note_nodes = []
for u in url:
    if "programs" in u:
        program_nodes.append(u)
    elif "notes" in u:
        note_nodes.append(u)
    else:
        print(u)

name_nodes = [n for n in not_group_nodes if n[0] == '-']
dur_nodes = [n for n in not_group_nodes if n[:3] == 'dur']
vel_nodes = [n for n in not_group_nodes if n[:3] == 'vel']
time_nodes = [n for n in not_group_nodes if n[:4] == 'time']
tempo_nodes = [n for n in not_group_nodes if n not in set(dur_nodes).union(vel_nodes, time_nodes, name_nodes, url)]

tempo_nodes

['11', '6', '9']

In [67]:
node_categories = {"note_group": note_groups,
                    "pitch": note_nodes,
                    "program": program_nodes,
                    "MIDI": name_nodes,
                    "duration": dur_nodes,
                    "velocity": vel_nodes,
                    "time_sig": time_nodes,
                    "tempo": tempo_nodes
                   }

node_categories.keys()

dict_keys(['note_group', 'pitch', 'program', 'MIDI', 'duration', 'velocity', 'time_sig', 'tempo'])

In [68]:
def add_edge_names(edges_pd: pd.DataFrame, node_cat: dict) -> pd.DataFrame:
    edge_type = []

    edge_name_source = None
    edge_name_target = None

    augmented_edges_pd = edges_pd.copy()

    for i in range(len(edges.index)):
        for name in node_cat.keys():
            if edges.iloc[i]['source'] in node_cat[name]:
                edge_name_source = name + "__"
                break
        for name in node_cat.keys():
            if edges.iloc[i]['target'] in node_cat[name]:
                edge_name_target = name
                break
                
        edge_name = edge_name_source + edge_name_target
        edge_type.append(edge_name)
        
    augmented_edges_pd['edge_type'] = edge_type
    return augmented_edges_pd


In [69]:
edges_2 = add_edge_names(edges, node_categories)


In [70]:
set(edges_2['edge_type'])

{'MIDI__note_group',
 'MIDI__program',
 'MIDI__tempo',
 'MIDI__time_sig',
 'duration__note_group',
 'note_group__MIDI',
 'note_group__duration',
 'note_group__pitch',
 'note_group__velocity',
 'pitch__note_group',
 'program__MIDI',
 'time_sig__MIDI',
 'velocity__note_group'}

In [61]:
edges_2.loc[edges_2['edge_type'] == 'MIDI__program', ['source', 'target']]

Unnamed: 0,source,target
933,-Albert_King_-_Born_Under_A_Bad_Sign,http://purl.org/midi-ld/programs/30
934,-Albert_King_-_Born_Under_A_Bad_Sign,http://purl.org/midi-ld/programs/0
935,-Albert_King_-_Born_Under_A_Bad_Sign,http://purl.org/midi-ld/programs/33
9697,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/17
9698,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/4
9699,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/26
9700,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/61
9701,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/56
9702,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/66
9703,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/35


In [62]:
edges_2.loc[edges_2['edge_type'] == 'program__MIDI', ['source', 'target']]

Unnamed: 0,source,target
7102,http://purl.org/midi-ld/programs/30,-B_B_King_-_Rock_Me_Baby
7103,http://purl.org/midi-ld/programs/0,-B_B_King_-_How_Blue_Can_You_Get
7104,http://purl.org/midi-ld/programs/0,-B_B_King_-_Rock_Me_Baby
7105,http://purl.org/midi-ld/programs/33,-B_B_King_-_Rock_Me_Baby


In [66]:
main_edge_types = ["MIDI__has__tempo",
                   "MIDI__in__time_sig",
                   "MIDI__has__program",
                   "MIDI__has__note_group",
                   "note_group__has__velocity",
                   "note_group__has__duration",
                   "note_group__contains__pitch"]


# 'duration__note_group',
# 'note__note_group',
# 'velocity__note_group'

# 'note_group__MIDI',
# 'program__MIDI',
# 'time_sig__MIDI',


In [98]:
def reverse_edge(df: pd.DataFrame, row: int, inplace: bool = False) -> Optional[pd.DataFrame]:
    if inplace:
        df.iloc[row]['source'], df.iloc[row]['target'] = df.iloc[row]['target'], df.iloc[row]['source']
        return None
    elif not inplace:
        tmp = df.copy()
        tmp.iloc[row]['source'], tmp.iloc[row]['target'] = tmp.iloc[row]['target'], tmp.iloc[row]['source']
        return tmp


def format_edge_name(source: str, target: str) -> str:
    edge_name = ""

    if source == "MIDI":
        if target == "tempo":
            edge_name = source + "__has__" + target
        elif target == "time_sig":
            edge_name = source + "__in__" + target
        elif target == "program":
            edge_name = source + "__has__" + target
        elif target == "note_group":
            edge_name = source + "__has__" + target
    elif source == "note_group":
        if target == "velocity":
            edge_name = source + "__has__" + target
        elif target == "duration":
            edge_name = source + "__has__" + target
        elif target == "pitch":
            edge_name = source + "__contains__" + target
    else:
        edge_name = source + "__?__" + target
        print("Not known edge detected: " + edge_name)
        return edge_name

    return edge_name
    

def add_edge_names2(edges_df: pd.DataFrame, node_cat: dict) -> pd.DataFrame:
    edge_type = []

    edge_name_source = ""
    edge_name_target = ""

    augmented_edges_df = edges_df.copy()

    for i in range(len(edges_df.index)):
        for name in node_cat.keys():
            if edges.iloc[i]['source'] in node_cat[name]:
                edge_name_source = name
                break
        for name in node_cat.keys():
            if edges.iloc[i]['target'] in node_cat[name]:
                edge_name_target = name
                break

        if (edge_name_source not in ("MIDI", "note_group")) or (edge_name_source == "note_group" and edge_name_target == "MIDI"):
            reverse_edge(augmented_edges_df, row=i, inplace=True)
            edge_name_source, edge_name_target = edge_name_target, edge_name_source

        edge_name = format_edge_name(edge_name_source, edge_name_target)
        edge_type.append(edge_name)

    augmented_edges_df['edge_type'] = edge_type
    return augmented_edges_df


In [99]:
edges_3 = add_edge_names2(edges, node_categories)

In [100]:
set(edges_3['edge_type'])

{'MIDI__has__note_group',
 'MIDI__has__program',
 'MIDI__has__tempo',
 'MIDI__in__time_sig',
 'note_group__contains__pitch',
 'note_group__has__duration',
 'note_group__has__velocity'}

In [102]:
edges_3.loc[edges_3['edge_type'] == "MIDI__has__program", ['source', 'target', ]]

Unnamed: 0,source,target
933,-Albert_King_-_Born_Under_A_Bad_Sign,http://purl.org/midi-ld/programs/30
934,-Albert_King_-_Born_Under_A_Bad_Sign,http://purl.org/midi-ld/programs/0
935,-Albert_King_-_Born_Under_A_Bad_Sign,http://purl.org/midi-ld/programs/33
7102,-B_B_King_-_Rock_Me_Baby,http://purl.org/midi-ld/programs/30
7103,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/0
7104,-B_B_King_-_Rock_Me_Baby,http://purl.org/midi-ld/programs/0
7105,-B_B_King_-_Rock_Me_Baby,http://purl.org/midi-ld/programs/33
9697,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/17
9698,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/4
9699,-B_B_King_-_How_Blue_Can_You_Get,http://purl.org/midi-ld/programs/26
