In [1]:
#!pip install amrlib
import os
import pandas as pd
import ast
import amrlib

### Read Data

In [2]:
data = pd.read_csv("data.csv", sep=";")

In [3]:
def safe_eval(val):
    try:
        return ast.literal_eval(val)
    except (ValueError, SyntaxError):
        return val  # or return None, or np.nan if you prefer

songs = data["normalized chords"].apply(safe_eval)
parsed_songs = data["normalized and parsed chords"].apply(safe_eval)

## Weights

### Assemble and count bigrams

In [4]:
def makebigrams(song):
    bigrams = []
    for i, chord in enumerate(song):
        if i == 0:
            bigrams.append(("Start", chord))
            bigrams.append((chord, song[i + 1]))
        elif i != 0 and i != (len(song) - 1):
            bigrams.append((chord, song[i + 1]))
        else:
            bigrams.append((chord, "End"))
    
    return bigrams

In [5]:
bgcorpus = [makebigrams(song) for song in songs]

In [6]:
def countbg(song):
    bgcounts = {}
    for change in song:
        if change not in bgcounts.keys():
            bgcounts[change] = 1
        else:
            bgcounts[change] += 1
    
    return bgcounts

In [7]:
bgcounts = [countbg(song) for song in bgcorpus]

### Nodes

In [12]:
n = 0
for i, song in enumerate(songs):
    allnodes = list(set(song)) + ["Start", "End"]

    nodes = pd.DataFrame()
    nodes["id"] = allnodes

    nodes.to_csv(f"node_tables/{n-1}_{data["song"][i]}_nodes.csv", index = False)
    n += 1

### Edges

In [11]:
n = 0

for i, song_bg in enumerate(bgcounts):
    source = []
    target = []
    weight = []
    for change, w in song_bg.items():
        source.append(change[0])
        target.append(change[1])
        weight.append(w/sum(bgcounts[0].values()))

        edges = pd.DataFrame()
        edges["Source"] = source
        edges["Weight"] = weight
        edges["Target"] = target

    n += 1
    edges.to_csv(f"penman_tables/{n-1}_{data["song"][i]}.csv", index = False)

## Intervals

In [10]:
# Extracting roots.
chord_roots = []

for song in parsed_songs:
    roots = []
    for chord in song:
        try:
            roots.append(chord["Root"])
        except:
            roots.append("ERROR")
    chord_roots.append(roots)

In [11]:
# Replacing root with distance from key.
minor_map = {"A": 0, "A#": 1, "Bb": 1, "B": 2, "C": 3, "C#": 4, "Db": 4, "D": 5, "D#": 6, "Eb": 6,
                 "E": 7, "F": 8, "F#": 9, "Gb": 9, "G": 10, "G#": 11, "Ab": 11}
major_map = {"C": 0, "C#": 1, "Db": 1, "D": 2, "D#": 3, "Eb": 3, "E": 4, "F": 5, "F#": 6, "Gb": 6,
                 "G": 7, "G#": 8, "Ab": 8, "A": 9, "A#": 10, "Bb": 10, "B": 11}

song_intervals = []

for i, song in enumerate(chord_roots):
    if "-" in data["key"][i]:
        intervals = [minor_map.get(root) for root in song]
        song_intervals.append(intervals)
    else:
        intervals = [major_map.get(root) for root in song]
        song_intervals.append(intervals)

In [12]:
# Make interval bigrams
bgintervals = [makebigrams(intervals) for intervals in song_intervals]

In [164]:
# Calculating interval changes between chords
interval_changes = []
for index, song in enumerate(bgintervals):
    intervals = []
    for interval in song:
        try:
            if interval[0] == "Start":
                i = interval[1]
            elif interval[1] == "End":
                i = interval[0]
            else:
                i = interval[1] - interval[0]
            intervals.append(i)
        except:
            intervals = "ERROR"
    interval_changes.append(intervals)

In [165]:
# Formatting the chord extensions
extensions = []
for song in parsed_songs:
    deparsed_song = []
    if "ERROR" not in song:
        for chord in song:
            if "Root" in chord.keys():
                chord.pop("Root")
            joined_chord = "".join(chord.values())
            if joined_chord != "":
                deparsed_song.append(f"({joined_chord})")
            else:
                deparsed_song.append("")
        deparsed_song.append("")
    else:
        deparsed_song = "ERROR"
    extensions.append(deparsed_song)

In [166]:
# Adding the extensions to the intervals
complete_intervals = []
for i1, song in enumerate(interval_changes):
    interval_and_extension = []
    for i2, interval in enumerate(song):
        interval_and_extension.append(str(interval) + extensions[i1][i2])
    complete_intervals.append(interval_and_extension)

In [167]:
interval_d = []
for i1, song in enumerate(bgcorpus):
    bgdict = {}
    n = 0
    for i2, bg in enumerate(song):
        try:
            bgdict[bg] = complete_intervals[i1][i2]
        except:
            n += 1
            bgdict[f"ERROR_{n}"] = "ERROR"
    interval_d.append(bgdict)

In [168]:
n = 0

for index, song in enumerate(interval_d):
    source = []
    target = []
    weight = []
    interval = []
    for bg, i in song.items():
        try:
            source.append(bg[0])
            target.append(bg[1])
            weight.append(w/sum(bgcounts[0].values()))
            interval.append(i)
        except:
            source.append("ERROR")
            target.append("ERROR")
            weight.append("ERROR")
            interval.append("ERROR")
        edges = pd.DataFrame()
        edges["Source"] = source
        edges["Weight"] = weight
        edges["Target"] = target
        edges["Interval"] = interval

    n += 1
    edges.to_csv(f"penman_tables2/{n}_{data["song"][index]}_penman.csv", index = False)

In [172]:
transitions = [(songs[0][i], songs[0][i+1]) for i in range(len(songs[0])-1)]

In [173]:
transitions

[('D9', 'F-6'),
 ('F-6', 'D9'),
 ('D9', 'F-6'),
 ('F-6', 'C'),
 ('C', 'C7'),
 ('C7', 'B7'),
 ('B7', 'Bb7'),
 ('Bb7', 'A7'),
 ('A7', 'D9'),
 ('D9', 'G7'),
 ('G7', 'Ab7'),
 ('Ab7', 'G7'),
 ('G7', 'D9'),
 ('D9', 'F-6'),
 ('F-6', 'D9'),
 ('D9', 'F-6'),
 ('F-6', 'C'),
 ('C', 'C7'),
 ('C7', 'B7'),
 ('B7', 'Bb7'),
 ('Bb7', 'A7'),
 ('A7', 'D9'),
 ('D9', 'G7'),
 ('G7', 'C6'),
 ('C6', 'C7'),
 ('C7', 'C7'),
 ('C7', 'F6'),
 ('F6', 'F6'),
 ('F6', 'D7'),
 ('D7', 'D7'),
 ('D7', 'G7'),
 ('G7', 'G7'),
 ('G7', 'D9'),
 ('D9', 'F-6'),
 ('F-6', 'D9'),
 ('D9', 'F-6'),
 ('F-6', 'C'),
 ('C', 'C7'),
 ('C7', 'B7'),
 ('B7', 'Bb7'),
 ('Bb7', 'A7'),
 ('A7', 'D9'),
 ('D9', 'G7'),
 ('G7', 'C6')]