In [1]:
import MySQLdb
import pandas as pd
import numpy as np
import re

ModuleNotFoundError: No module named 'MySQLdb'

In [2]:
def connect_to_database(db="UltimateGuitarTabs"):
    """
    Connects to MySQL database and 
    returns the connection"""
    db = MySQLdb.connect(host="localhost",  # your host 
                         user="root",       # username
                         passwd="",     # password
                         db=db)   # name of the database
    return db

def print_table(tableName):
    """
    Prints database table
    """
    db = connect_to_database()
    cur = db.cursor()

    # Select data from table using SQL query.
    cur.execute("SELECT * FROM "+tableName)
    # print the first and second columns      
    for row in cur.fetchall() :
        print(row)
        
def get_table(tableName):
    db = connect_to_database()
    cur = db.cursor()

    # Select data from table using SQL query.
    cur.execute("SELECT * FROM "+tableName)
    
    return(cur.fetchall())


In [3]:
Chords = pd.DataFrame(list(get_table('Chords')), columns=['Song', 'Artist', 'Key', 'Capo', 'Chords'])
Chords

Unnamed: 0,Song,Artist,Key,Capo,Chords
0,Hallelujah,Jeff Buckley,Db,1,"C,Am,C,Am,C,Am,C,Am,F,G,C,G,C,F,G,Am,F,G,E7,Am..."
1,Im Yours,Jason Mraz,G,4,"G,D,Em,C,G,D,Em,C,G,D,Em,C,G,D,Em,C,G,D,Em,C,G..."
2,Wonderwall,Oasis,F#m,2,"Em7,G,Dsus4,A7sus4,Em7,G,Dsus4,A7sus4,Em7,G,Ds..."
3,Wish You Were Here,Pink Floyd,Em,0,"Em7,G,Em7,G,Em7,A7sus4,Em7,A7sus4,G,C,D/F#,G,D..."
4,Hey Soul Sister,Train,C,4,"C,G,Am,F,C,G,Am,F,C,G,Am,F,G,C,G,Am,F,C,G,Am,F..."
5,Wonderwall,Oasis,D,0,"F#m,A,Esus4,B,F#m,A,Esus4,B,F#m,A,Esus4,B,F#m,..."
6,The Only Exception,Paramore,B,4,"G,Dm,Cmaj7,G,Dm,Cmaj7,G,Dm,Cmaj7,G,Dm,Cmaj7,G,..."
7,Love Story,Taylor Swift,C,2,"C,G,Am,F,C,F,Am,F,C,F,Am,G,F,G,Am,C,F,G,Am,F,G..."
8,Creep,Radiohead,G,0,"G,B,C,Cm,G,B,C,Cm,G,B,C,Cm,G,B,C,Cm,G,B,C,Cm,G..."
9,Iris,Goo Goo Dolls,Bm,0,"Bm,Bsus2,G,D,Em,G,Bm,A,G,D,Em,G,Bm,A,G,D,Em,G,..."


In [22]:
def fix_accidental(note, accidental):
    notes = np.asarray(['A', 'B', 'C', 'D', 'E', 'F', 'G'])

    note_idx = int(np.where(notes == note)[0])
    if accidental == '#':
        return(note,accidental)
    elif accidental == 'b':
        return(notes[note_idx - 1],'#')
    else:
        return(note, accidental)
            
        
def clean_chords(chords):
    """
    This function takes in a comma-separated string of 
    chords and cleans it by removing any base note variations, or
    other chord embelishments. Diminished labels are kept as these
    are used in the chord progression table. The purpose of this
    is to clean the chords to match the labels within the chord
    progression table.
    
    returns:
        new_chords - array of newly cleaned chords to be tabulated
                        by the chord progression table
    """
    
    # Pattern grouping: 1=(chord pitch) 2=(base note) 3=(chord type) 4=(base note)
    pattern = "^([A-G]+)(\/[A-G]*[b#])*([(?m)|(?m\d)|(?b\d)|(?#\d)|(?maj\d)|\
    (?add\d)|(?sus\d)|(?aug)|(?aug\d)|(?dim)|(?dim\d)]*)(\/[A-G]*[b#])*"        
    prog = re.compile(pattern)

    pattern2 = "^([A-G])([b#])?(m$|m\d$)?(dim$|dim\d$)?"
    prog2 = re.compile(pattern2)

    chords = chords.split(',')
    new_chords = [""]*len(chords)
    for i in range(len(chords)):
        curr_chord = chords[i]
        groups = prog.findall(curr_chord)[0] 
        no_base = groups[0] + groups[2]
        no_num = re.sub(pattern="\d", repl="", string=no_base)

        groups = prog2.findall(no_num)[0]
        note,accidental = fix_accidental(groups[0], groups[1])
        new_chords[i] = note + accidental + groups[2] + groups[3]
        
    return(new_chords)


def get_key_tbls():
    """
    Helper function that reads in a chord progression table
    and creates a dictionary that maps chord names to their
    indices on the chord progression table. This dictionary
    will be used to tabulate a 'key table' to determine the
    key of a song.
    
    returns:
        Key_dict - Dictionary mapping chord names to indices
        Keys - array of keys that correspond to the order
                of the progression table
    """
    Key_tbl = pd.read_csv('key_table.csv')
    Keys = list(Key_tbl.key)
    Tbl = np.asmatrix(Key_tbl.iloc[:,1:8])

    # Storing all possible chords 
    all_chords = []
    for i in range(Tbl.shape[0]):
        for j in np.asarray(Tbl[i])[0]:
            all_chords.append(j)
    all_chords = np.unique(all_chords)
    
    # Creating dict(key='chord', val='indices in progression tbl')
    Key_dict = {}
    for chord in all_chords:
        Key_dict[chord] = np.where(Tbl == chord)

    return(Key_dict, Keys)

def compute_key(Key_dict, Keys, chords):
    chords = clean_chords(chords)
    count_mat = np.zeros((12,7)) # Matrix of zeros to tabulate chord occurences
    
    # Tabulating chords
    for chord in chords:
        count_mat[Key_dict[chord]] += 1
        
    computed_key = Keys[np.argmax(np.sum(count_mat, axis = 1))]
    return(computed_key)
    
def is_rel_min(comp_key, act_key):
    notes = ['A', 'A#', 'B', 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#']
    if len(act_key) != 2 or str(act_key[1]) != 'm':
        return('Not relative minor')
    if comp_key == act_key:
        return('Keys are the same')
    if len(comp_key) == 2:
        note, accidental = fix_accidental(comp_key[0],comp_key[1])
        comp_key = note + accidental
    comp_idx = np.where(np.asarray(notes) == comp_key)[0]
    rel_min_idx = comp_idx - 3
    
    if notes[rel_min_idx] + 'm' == act_key:
        return(True)
    return(comp_idx)
is_rel_min('C#', 'D')

'Not relative minor'

In [5]:
Key_dict, Keys = get_key_tbls()

for i in range(Chords.shape[0]):
#     chords = row['Chords']
#     new_chords = compute_key(chords) 
    if Chords['Key'][i] == '':
        continue
    else:
        print('song: ' + Chords['Song'][i])
        print('capo: ' + str(Chords['Capo'][i]))
        print('actual key: ' + Chords['Key'][i])
        print('computed key: ' + compute_key(Key_dict, Keys, Chords['Chords'][i]))
        print('')    

song: Hallelujah
capo: 1
actual key: Db
computed key: C

song: Im Yours
capo: 4
actual key: G
computed key: G

song: Wonderwall
capo: 2
actual key: F#m
computed key: G

song: Wish You Were Here
capo: 0
actual key: Em
computed key: G

song: Hey Soul Sister
capo: 4
actual key: C
computed key: C

song: Wonderwall
capo: 0
actual key: D
computed key: A

song: The Only Exception
capo: 4
actual key: B
computed key: C

song: Love Story
capo: 2
actual key: C
computed key: C

song: Creep
capo: 0
actual key: G
computed key: C

song: Iris
capo: 0
actual key: Bm
computed key: D

song: Let It Be
capo: 0
actual key: C
computed key: C

song: Cant Help Falling In Love
capo: 2
actual key: C
computed key: C

song: Viva La Vida
capo: 1
actual key: D
computed key: G

song: Dont Look Back In Anger
capo: 0
actual key: C
computed key: C

song: Im Yours
capo: 0
actual key: C
computed key: C

song: Radioactive
capo: 2
actual key: Am
computed key: G

song: Hey Jude
capo: 0
actual key: F
computed key: F

song: Co

computed key: G

song: Wonderful Tonight
capo: 0
actual key: G
computed key: G

song: Champagne Supernova
capo: 0
actual key: A
computed key: D

song: If I Fell
capo: 0
actual key: Ebm
computed key: D

song: Wake Me Up When September Ends
capo: 0
actual key: G
computed key: G

song: Still Into You
capo: 5
actual key: F
computed key: C

song: She Moves In Her Own Way
capo: 0
actual key: G
computed key: G

song: Something
capo: 0
actual key: C
computed key: G

song: Sign Of The Times
capo: 0
actual key: F
computed key: C

song: Snow Hey Oh
capo: 4
actual key: Abm
computed key: G

song: Starman
capo: 0
actual key: Gm
computed key: F

song: Lyin Eyes
capo: 0
actual key: G
computed key: G

song: The Times They Are A-Changin
capo: 0
actual key: G
computed key: G

song: Blackbird
capo: 0
actual key: G
computed key: G

song: Remembering Sunday
capo: 2
actual key: A
computed key: G

song: Run
capo: 0
actual key: Am
computed key: C

song: History
capo: 1
actual key: Gb
computed key: F

song: New

In [701]:
fix_accidental('D', 'b')

'C#'

In [656]:
count_mat[Key_dict['C']] += 1


In [657]:
count_mat

array([[1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0.]])

In [550]:
Keys[np.argmax(np.sum(count_mat, axis = 1))]

'C'

Sudo code:
- Read in each string of chords and clean them
- Tabulate # of times each chord comes up in a given song
 1. Take note of each unique chord
 2. Call np.where for each and store the corresponding indices
     - This will save time so we don't call np.where for every single chord of every song
- Row sum and treat the largest row as the key of the song