# Distance based Transition Matrix

In [1]:
import random

import numpy as np
import pandas as pd

In [2]:
# Input
data_file = "POPmusic.csv"

# Delimiter
data_file_delimiter = ','

# The max column count a line in the file could have
largest_column_count = 0

# Loop the data lines
with open(data_file, 'r') as temp_f:
    # Read the lines
    lines = temp_f.readlines()

    for l in lines:
        # Count the column count for the current line
        column_count = len(l.split(data_file_delimiter)) + 1

        # Set the new most column count
        largest_column_count = column_count if largest_column_count < column_count else largest_column_count

# Close file
temp_f.close()

# Generate column names (will be 0, 1, 2, ..., largest_column_count - 1)
column_names = [i for i in range(0, largest_column_count)]

# Read csv
df = pd.read_csv(data_file, header=None, delimiter=data_file_delimiter, names=column_names)
print(df)

      0    1     2     3    4      5       6      7      8      9    ...  468  \
0      E5  7.0  7.11  11.4  4.7    0.5     0.5    7.0     E5    7.0  ...  NaN   
1       4  7.0     7     2  2.7      7       4      2    4.9      0  ...  NaN   
2     7.0   E4    C4   2.7   B3     C4     0.5     E4     C4    2.7  ...  NaN   
3      C5  7.0    G4    D5   G4     E5     7.0     G4     D5     G4  ...  NaN   
4     7.0   C5    E5    C5   D5    2.7      D5     E5    4.9     A4  ...  NaN   
5      A4  0.5    A4    A4   A4     G4     7.0     C4    4.9     G3  ...  NaN   
6     4.9   E5   4.9    E5   D5      4     4.7     E5     D5      4  ...  NaN   
7      C5  0.5    C5    C5  9.2     A4      E5     D5     C5     B4  ...  NaN   
8     4.7  7.0    G3    E4   E4     E4      E4     D4     F4    9.2  ...  NaN   
9   0.4.7  7.0    G4    A4  7.0     E4  7.11.2    2.7     G4  0.2.7  ...  NaN   
10    7.0  4.9   9.0   0.5  7.0    0.4      C3     E3    7.0    4.9  ...  NaN   
11    9.2   G5    E5    G5  

In [3]:
# Function to generate a list of unique notes and chords from the dataframe
def unique(df):
    main_list = []
    for i in range(len(df)):
        tlist = df.iloc[i].to_list()  # a song is picked from dataframe
        cleaned_list = [x for x in tlist if str(x) != 'nan']  # removing nan values
        main_list += cleaned_list  # all songs joined into a single list
    main_list = [str(x) for x in main_list]  # converting some integers and floats to string
    return list(set(main_list))  # only unique chords and notes are returned

In [4]:
# Function returns a clean list
def clean(tlist):
    tlist = [str(x) for x in tlist]
    return [x for x in tlist if str(x) != 'nan']

In [5]:
unique_list = unique(df)
print(len(unique_list))

65


In [6]:
# Function to generate transition matrix
def trans_matrix_gen(tlist, df):
    trans_matrix = np.zeros((len(tlist), len(tlist)))
    # the average distance between all pssible combinations of unique notes and chords is found
    for l1 in tlist:
        for l2 in tlist:
            c = 0  # acts as a counter in how many songs a combination exists
            avg_dist_total = 0  # the average distace between two unique notes or chords over all songs where they are found
            for i in range(len(df)):
                pos2 = 0
                avg_dist = 0  # the average distace between two unique notes or chords over a song
                k = 0  # counter for number of times a distance is calculated
                song = clean(df.iloc[i].to_list())  # cleaning the song
                if l1 in song and l2 in song:
                    if l1 != l2:  # case for different notes or chords
                        c += 1  # since both are found we increase the presence counter
                        pos1 = np.where(np.array(song) == l1)[0]  # the array stores the indices where l1 is found
                        start = np.min(pos1) + 1  # we search for l2 after the minimum index of l1
                        while pos2 < len(
                                song):  # we break from the loop if the position for l2 reaches end of list or we are unable to find any more
                            try:
                                pos2 = song.index(l2, start)
                                x = pos2 - pos1
                                dist = np.min(np.where(x >= 0, x, np.inf))  # minimum positive distance is stored
                                avg_dist += dist  # for now we just add the distances
                                k += 1  # we increase the measurement couter
                            except ValueError:
                                break
                            # start position for next search is moved to just next to where we found l2
                            start = pos2
                            start += 1
                        if k != 0:
                            avg_dist = avg_dist / k  # average distance in a song
                        avg_dist_total += avg_dist  # for now we add the distance

                    else:  # case for same notes or chords
                        c += 1
                        pos1 = song.index(l1)
                        start = pos1 + 1
                        while pos1 < len(song) - 1 and pos2 < len(song):
                            try:
                                pos2 = song.index(l2, start)
                                dist = pos2 - pos1
                                avg_dist += dist
                                k += 1
                                pos1 = pos2  # if there are 3 times the same chord or note we want the distance between 1st and 2nd, and 2nd and 3rd
                            except ValueError:
                                break
                            start = pos2
                            start += 1
                        if k != 0:
                            avg_dist = avg_dist / k
                        avg_dist_total += avg_dist

            if c != 0:
                avg_dist_total = avg_dist_total / c  # average distance of a combination over all songs where they are bound
            if avg_dist_total != 0:
                trans_matrix[tlist.index(l1)][tlist.index(
                    l2)] = 1 / avg_dist_total  # 1/d is taken so higher distance gives lower transition probabilty

    for i in range(len(tlist)):
        trans_matrix[i] = trans_matrix[i] / np.sum(trans_matrix[i])  # dividing by sum to convert to probability
    return trans_matrix

In [7]:
transition_matrix = trans_matrix_gen(unique_list, df)

In [8]:
print(transition_matrix)

[[0.05393531 0.00415686 0.         ... 0.00500637 0.00583715 0.06741914]
 [0.01634921 0.01315953 0.12261908 ... 0.01034499 0.01246193 0.01362434]
 [0.         0.02261703 0.01475616 ... 0.03262837 0.02297518 0.        ]
 ...
 [0.00620709 0.00808172 0.00610364 ... 0.01133966 0.00869674 0.00590675]
 [0.00540139 0.00745072 0.03330857 ... 0.00824465 0.0058287  0.00499629]
 [0.47776861 0.00725265 0.         ... 0.00870781 0.01012222 0.        ]]


In [9]:
print(np.max(transition_matrix), np.min(transition_matrix))
print(np.sum(transition_matrix[12]))
print(np.max(transition_matrix[2]), np.min(transition_matrix[2]))

0.6567742523038305 0.0
1.0
0.1780576751139864 0.0


In [10]:
# Function to make music
def music_gen(tlist, trans_matrix, length):
    music = [random.choice(tlist)]  # first note or chord is randomly chosen from the unique_list
    c = 0
    while c < length:
        i = tlist.index(music[c])  # we find the index of current note or chord in unique_list
        music.append(random.choices(tlist, weights=list(trans_matrix[i]), k=1)[
                         0])  # the row in transition matrix corresponding to the index we found contains all the transition probabilities
        c += 1
    return music

In [11]:
print(music_gen(unique_list, transition_matrix, 1000))

['5.11', 'C4', 'G3', 'B3', 'F2', '4.9', 'A5', '9.0', '9.0', '11.2', '2.4.9', 'F5', 'C3', '7.11', 'C4', '9.2', 'B5', 'E3', 'D6', '7.0', '9', '11.2', '2.4.9', '5.9.0', 'E5', 'F2', 'B3', 'B3', '5.7', '11.2', '2.4.9', '5.9.0', '4.9', '2.4.9', 'G4', '0.4', '2.4.9', '9.0.2', '0.4.7', '9', '2.5', 'G3', 'B3', '0.4', 'A2', '7.11', 'G4', '4.7', 'C2', '4.7', '9.0', '0.4.7', '7.11', '7', '11.4', '4', '5.9', 'B3', 'C2', 'B3', 'C4', '5.11', '7.0', 'F5', 'G3', 'A3', 'E3', '9.2', '9.0.4', 'E5', '0.2', '7.11.2', 'A4', 'A5', 'A2', '7.0', '2.4', '7.0', '5.7.0', 'C4', 'A3', 'D5', '9.0.4', '9.2', 'B5', 'D6', 'C6', 'E6', 'D5', '9.0.2', 'D5', '5', '4.7', 'B-3', 'B3', '7.0', '7.0', '5.7.0', '0.5', '5.11', '0.5', '5.11', 'F4', '11.2', '2.4.9', '11.2', '2.4.9', '5.9.0', '2.5.9', '2.4.9', '5.9.0', '0.4.7', 'B4', 'F5', 'D6', '0.2', '5.9', 'F2', 'B-3', 'B3', 'C2', 'F2', 'B3', 'F2', 'B3', 'C3', 'E4', '5.7', '5.11', 'G3', 'F3', 'F3', 'E3', '5.9', '9.2', 'F5', 'D6', 'C6', 'B4', '11', 'A4', '7.11', 'E2', 'G4', '7', '1

In [12]:
num_songs = 5
songs = []

# A bunch of songs are created and put into a dataframe
for _ in range(num_songs):
    songs.append(music_gen(unique_list, transition_matrix, 200))
df2 = pd.DataFrame(songs)
print(df2)
df2.to_csv("dist_method_songs.csv")

     0      1      2     3      4    5      6      7      8     9    ...  \
0    5.7     E4   5.11    G3     E3   G5    4.7     E2     F2   B-3  ...   
1  5.9.0     C4     G3  11.2  2.4.9  7.0  5.7.0  9.0.4    9.0    C6  ...   
2    7.0   5.11     F4   4.9  9.0.4  9.2    5.9      7     E5    C6  ...   
3  9.0.4   11.2  5.7.0    G4      9  2.5    2.4     E4    5.7  5.11  ...   
4  0.4.7  5.7.0  2.5.9   7.0  9.0.4  4.9     E6    2.7  5.7.0   4.9  ...   

    191     192    193    194 195    196    197  198    199     200  
0    D6     9.0     E4  5.7.0  G4  0.2.7  2.5.9  0.5    0.2  7.11.2  
1   9.2      B3     A4     E4  D4    2.5     A3  2.7  2.5.9     4.7  
2    C2      C3     F2     C2  B3     F4   5.11   B3    4.9     9.0  
3  11.4     5.9      4     F5   9     10      5  2.5      5     2.5  
4   2.7  7.11.2  2.5.9  0.2.7  C5    0.4   7.11    5    2.5       5  

[5 rows x 201 columns]
