In [1]:
import os

folder_name = "\\songs"
files = os.listdir(os.getcwd() + folder_name)
input_songs = [file for file in files if file.endswith('.txt')]
print (input_songs) # Original songs

['keyboards_ray-charles_a-song-for-you.txt', 'keyboards_ray-charles_aint-misbehavin.txt', 'keyboards_ray-charles_aint-that-love.txt', 'keyboards_ray-charles_almost-like-being-in-love.txt', 'keyboards_ray-charles_alone-together.txt', 'keyboards_ray-charles_am-i-blue.txt', 'keyboards_ray-charles_america-the-beautiful.txt', 'keyboards_ray-charles_basin-street-blues.txt', 'keyboards_ray-charles_bewitched-bothered-and-bewildered.txt', 'keyboards_ray-charles_black-coffee.txt', 'keyboards_ray-charles_born-to-be-blue.txt', 'keyboards_ray-charles_born-to-lose.txt', 'keyboards_ray-charles_bulldog-bite-hunker-down-hairy-dawg.txt', 'keyboards_ray-charles_busted.txt', 'keyboards_ray-charles_california-here-i-come.txt', 'keyboards_ray-charles_candy.txt', 'keyboards_ray-charles_carry-me-back-to-old-virginny.txt', 'keyboards_ray-charles_come-rain-or-come-shine.txt', 'keyboards_ray-charles_cry-me-a-river.txt', 'keyboards_ray-charles_crying-time.txt', 'keyboards_ray-charles_dont-cry-for-me-argentina.txt

In [2]:
import re
import  numpy as np

def fetch_chords(line):
    """
    Run regex expression to isolate chords in the txt file.
    """
    notes = "[CDEFGAB]";
    ignores = "(?![A-Za-z\'])"
    additonas_1 = "(?:\/[CDEFGAB])?";
    accidentals = "(?:#|##|b|bb)?";
    chords = "(?:maj|min|m|sus|aug|dim|add)?";
    additions_2 = "(?:[0-9]+(?:\/[0-9]+)?-?\+?)?"
    additions_3 = "(?:\/[0-9]?-?[CDEFGAB]?)?"
    optional = "(?:[0-9]+[CDEFGABM])?"
    return re.findall(r'\b' + notes + accidentals + chords + additonas_1 + accidentals + additions_2 + additions_3 + optional + ignores + r'(?!\w)', line)

def flat_list(in_list):
    """
    Convert the nested list into a single flat list
    """
    return list(np.concatenate(in_list).flat)

def write_output_file(songs_name, chords):
    """
    Write the final output in to a file
    """
    directory = os.path.join(os.getcwd(), "songs\\final")
    if not os.path.exists(directory):
            os.makedirs(directory)
    try:
        out_file_name = f"{directory}\\{songs_name}.txt"
        with open(out_file_name, 'w') as file:
            for chord in chords:
                file.write(chord + '\n')
    except Exception as e:
        print(f"An error occurred: {str(e)}")

def write_to_csv(name, chords):
    """
    Write the source and destination chords to csv file.
    """
    directory = os.path.join(os.getcwd(), "songs\\final\\csv")
    if not os.path.exists(directory):
            os.makedirs(directory)
    chords.to_csv(f'{directory}\{name}.csv', index=False, sep=';')

def generate_unique_chord_ids(unique_chords):
     """
     Generate unique ID's for each chord.
     """
     return dict(zip(unique_chords, range(1,len(unique_chords)+1)))

In [3]:
def extract_song_details(path):
    """
    Read the txt file and extract song name and chords
    """
    try:
        file = open(path, 'r') # open the file in read mode
        lines = file.readlines() # read lines in text file to a nested list

        song_name = lines.pop(0).replace('\n', '') # get the song name

        chords = list(map(fetch_chords, lines)) # fetch chords in each line
        chords_ = [chord for chord in chords if chord] # remove empty lists NOTE: empty lists are formed for the lyrics lines
        #f_chords.pop(len(f_chords)-1) # remove the unncessary E character fetched from the last line
        flat_vec_chords = flat_list(chords_)
        file.close() # close the file
        return song_name, flat_vec_chords # return
    except FileNotFoundError:
        print("File not found")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [8]:
unique_chords = set()

for song in input_songs:
    name, chords = extract_song_details(f"songs\{song}")
    unique_chords.update(chords)
    write_output_file(name, chords)

chord_ids = generate_unique_chord_ids(sorted(unique_chords))

{'A': 1,
 'A#': 2,
 'A#/A': 3,
 'A#6': 4,
 'A#6/C': 5,
 'A#7': 6,
 'A#9': 7,
 'A#add9': 8,
 'A#dim': 9,
 'A#m': 10,
 'A#m6': 11,
 'A#m7': 12,
 'A#m7/5-': 13,
 'A#maj7': 14,
 'A4/7': 15,
 'A5+': 16,
 'A7': 17,
 'A7/13-': 18,
 'A7/9': 19,
 'A7/9-': 20,
 'A7M': 21,
 'A9': 22,
 'Adim': 23,
 'Am': 24,
 'Am/C': 25,
 'Am/D': 26,
 'Am/G': 27,
 'Am5+': 28,
 'Am5-/7': 29,
 'Am6': 30,
 'Am7': 31,
 'Am7+': 32,
 'Am7/D': 33,
 'Am7/F': 34,
 'Am7/G': 35,
 'Am9': 36,
 'B': 37,
 'B7': 38,
 'Bb': 39,
 'Bb7': 40,
 'Bb7/11+': 41,
 'Bb7M': 42,
 'Bb9': 43,
 'Bdim': 44,
 'Bdim7': 45,
 'Bm': 46,
 'Bm4/7': 47,
 'Bm5-/7': 48,
 'Bm7': 49,
 'C': 50,
 'C#': 51,
 'C#7': 52,
 'C#9': 53,
 'C#m': 54,
 'C#m7': 55,
 'C/A': 56,
 'C/B': 57,
 'C/E': 58,
 'C/F': 59,
 'C4': 60,
 'C4/7': 61,
 'C5+': 62,
 'C5-/7': 63,
 'C6': 64,
 'C6/9': 65,
 'C7': 66,
 'C7+': 67,
 'C7/13-': 68,
 'C7/5+': 69,
 'C7/9': 70,
 'C7M': 71,
 'C9': 72,
 'Cdim': 73,
 'Cdim7': 74,
 'Cm': 75,
 'Cm6': 76,
 'Cm7': 77,
 'Cmaj7': 78,
 'Csus4': 79,
 'D': 80,


In [5]:
folder_name = "\\songs\\final"
final_files = os.listdir(os.getcwd() + folder_name)
final_songs = [file for file in final_files if file.endswith('.txt')]
print (final_songs)

['Ray Charles - A Song For You (Keyboard chords).txt', "Ray Charles - Ain't Misbehavin' (Keyboard chords).txt", 'Ray Charles - Aint That Love (Keyboard chords).txt', 'Ray Charles - Almost Like Being In Love (Keyboard chords).txt', 'Ray Charles - Alone Together (Keyboard chords).txt', 'Ray Charles - Am I Blue (Keyboard chords).txt', 'Ray Charles - America the beautiful (Keyboard chords).txt', 'Ray Charles - Basin Street Blues (Keyboard chords).txt', 'Ray Charles - Bewitched, Bothered And Bewildered (Keyboard chords).txt', 'Ray Charles - Black Coffee (Keyboard chords).txt', 'Ray Charles - Born To Be Blue (Keyboard chords).txt', 'Ray Charles - Born to lose (Keyboard chords).txt', 'Ray Charles - Bulldog Bite Hunker Down Hairy Dawg (Keyboard chords).txt', 'Ray Charles - Busted (Keyboard chords).txt', 'Ray Charles - California Here I Come (Keyboard chords).txt', 'Ray Charles - Candy (Keyboard chords).txt', 'Ray Charles - Carry Me Back To Old Virginny (Keyboard chords).txt', 'Ray Charles - Co

In [6]:
import pandas as pd

def replace_chords_with_ids(chords, chord_dic):
    """
    Replace chord names with respective ID's from the dictionary
    """
    final = [chord_dic[chord] for chord in chords]
    return final

def create_src_dst(chords):
    """
    Create a dataframe consiting of source chord and target chord
    """
    df = pd.DataFrame(chords, columns=['Target'])
    df['Source'] = df['Target'].shift(periods=[1])
    df = df.iloc[1:]
    df['Source'] = df['Source'].astype(int)
    df = df[['Source','Target']]
    df = df.drop_duplicates()
    return df

def convert_to_gephi_format(path):
    """
    Read the txt file and convert them into gephi csv format
    """
    try:
        file = open(path, 'r') # open the file in read mode
        chords = file.readlines() # read lines in text file to a nested list
        chords = list(map(lambda a: a.replace('\n', ''), chords))
        trans_chord = replace_chords_with_ids(chords, chord_ids)
        df = create_src_dst(trans_chord)
        write_to_csv(os.path.basename(file.name), df)
        file.close()
        return df
    except FileNotFoundError:
        print("File not found")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

In [7]:
df_list = []
for song in final_songs:
    df = convert_to_gephi_format(f"songs\\final\{song}")
    df_list.append(df)
concat_df = pd.concat(df_list)
write_to_csv('all_songs', concat_df)