In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.dummy import DummyClassifier
import scipy.stats as stats
import itertools
from sklearn import metrics
from time import time
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm

## Spotify Calls

In [505]:
import requests
import json
import config

# retrieve access tokens

client_id = config.client_id
client_secret = config.client_secret

grant_type = 'client_credentials'

body_params = {'grant_type' : grant_type}

url='https://accounts.spotify.com/api/token'

response=requests.post(url, data=body_params, auth = (client_id, client_secret)) 
response.text
token = eval(response.text)
print('Full Token Data')
print(token)
print('\n')
print('token needed:', token.get('access_token'))

Full Token Data
{'access_token': 'BQCDNer7j0ca0-xwQRtgXjS06jLgiXVUJ87FjBriqYvaHBy4IIiG1d5l5q1sPStZpi4gppSuMBoN_8fP9xg', 'token_type': 'Bearer', 'expires_in': 3600, 'scope': ''}


token needed: BQCDNer7j0ca0-xwQRtgXjS06jLgiXVUJ87FjBriqYvaHBy4IIiG1d5l5q1sPStZpi4gppSuMBoN_8fP9xg


In [506]:
access_token = token.get('access_token')
headers = {'Authorization': 'Bearer '+ access_token}

## Genres Available in Spotify's API

Before choosing what genre's will be classifiers, I make a call to check which genre's are available.

In [4]:
# retreiving spotify's list of genres
avail_genres = requests.get('https://api.spotify.com/v1/recommendations/available-genre-seeds', headers=headers)
genres = avail_genres.json()
genres

{'genres': ['acoustic',
  'afrobeat',
  'alt-rock',
  'alternative',
  'ambient',
  'anime',
  'black-metal',
  'bluegrass',
  'blues',
  'bossanova',
  'brazil',
  'breakbeat',
  'british',
  'cantopop',
  'chicago-house',
  'children',
  'chill',
  'classical',
  'club',
  'comedy',
  'country',
  'dance',
  'dancehall',
  'death-metal',
  'deep-house',
  'detroit-techno',
  'disco',
  'disney',
  'drum-and-bass',
  'dub',
  'dubstep',
  'edm',
  'electro',
  'electronic',
  'emo',
  'folk',
  'forro',
  'french',
  'funk',
  'garage',
  'german',
  'gospel',
  'goth',
  'grindcore',
  'groove',
  'grunge',
  'guitar',
  'happy',
  'hard-rock',
  'hardcore',
  'hardstyle',
  'heavy-metal',
  'hip-hop',
  'holidays',
  'honky-tonk',
  'house',
  'idm',
  'indian',
  'indie',
  'indie-pop',
  'industrial',
  'iranian',
  'j-dance',
  'j-idol',
  'j-pop',
  'j-rock',
  'jazz',
  'k-pop',
  'kids',
  'latin',
  'latino',
  'malay',
  'mandopop',
  'metal',
  'metal-misc',
  'metalcore',


## Genre Playlist IDs

After choosing 15 genres, use the playlist ID to retrieve the songs available in the the genre.

In [5]:
# make a list of genre playlist IDS and names. Then convert into a dictionary

list_of_genre_ids = ['7Mr3wEdKgaiAP4Cm2a6vda', 
                     '4MRGjKqlWuJZJ8XHOGcqkR', 
                     '37i9dQZF1DX4dyzvuaRJ0n',
                     '1o1HVRGIxwCcdSnNnZ69IC',
                     '37i9dQZF1DWWEJlAGA9gs0',
                     '0TCtFMz5lY6jTfusk66ZFj',
                     '3kTtdRE1CtRyRKdicfOGAR',
                     '30BUPgw52SWNm2ZWZZc86A',
                     '6Ph1K0QWCcEwYRr0VhVt6C',
                     '5khoF3ksobwfVwOazDqpqI',
                     '6mRRGF4klfgUzbD2ZKOCq0',
                     '37i9dQZF1DWZd79rJ6a7lp',
                     '37i9dQZF1DX0xLQsW8b5Zx',
                     '2SwjQPegrTTYaOsWQrwhMe'
                    ]

list_of_genre_playlists = ['hip hop playlist',
                           'post rock playlist',
                           'electronic playlist',
                           'detroit_techo playlist',
                           'classical playlist',
                           'disco playlist',
                           'electro indie pop playlist',
                           'industrial pop playlist',
                           'french playlist',
                           'spanish playlist',
                           'ska playlist',
                           'sleep playlist',
                           'rockabilly playlist',
                           '50s Hits playlist'
                           ]

genre_dict = {list_of_genre_playlists[i]: list_of_genre_ids[i] for i in range(len(list_of_genre_playlists))}
genre_dict

{'hip hop playlist': '7Mr3wEdKgaiAP4Cm2a6vda',
 'post rock playlist': '4MRGjKqlWuJZJ8XHOGcqkR',
 'electronic playlist': '37i9dQZF1DX4dyzvuaRJ0n',
 'detroit_techo playlist': '1o1HVRGIxwCcdSnNnZ69IC',
 'classical playlist': '37i9dQZF1DWWEJlAGA9gs0',
 'disco playlist': '0TCtFMz5lY6jTfusk66ZFj',
 'electro indie pop playlist': '3kTtdRE1CtRyRKdicfOGAR',
 'industrial pop playlist': '30BUPgw52SWNm2ZWZZc86A',
 'french playlist': '6Ph1K0QWCcEwYRr0VhVt6C',
 'spanish playlist': '5khoF3ksobwfVwOazDqpqI',
 'ska playlist': '6mRRGF4klfgUzbD2ZKOCq0',
 'sleep playlist': '37i9dQZF1DWZd79rJ6a7lp',
 'rockabilly playlist': '37i9dQZF1DX0xLQsW8b5Zx',
 '50s Hits playlist': '2SwjQPegrTTYaOsWQrwhMe'}

In [6]:
# call playlist ID by calling the genre name of the playlist 
genre_dict.get('french playlist')

'6Ph1K0QWCcEwYRr0VhVt6C'

## Call Functions 1.1

In [7]:
# call spotify recieve playlist of songs as dictionary
def call_spotify(genre_ID):
    i = genre_ID
    base = 'https://api.spotify.com/v1/playlists/'
    end = '/tracks'
    r = requests.get(base + i + end, headers=headers)
    songs = r.json()
    return songs

In [8]:
# example of calling spotify to retrieve all songs in a genre playlist
call_spotify(genre_dict.get('french playlist'))

{'href': 'https://api.spotify.com/v1/playlists/6Ph1K0QWCcEwYRr0VhVt6C/tracks?offset=0&limit=100',
 'items': [{'added_at': '2016-10-25T19:26:01Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/lssmithmfl'},
    'href': 'https://api.spotify.com/v1/users/lssmithmfl',
    'id': 'lssmithmfl',
    'type': 'user',
    'uri': 'spotify:user:lssmithmfl'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4a16JEaHldo5vZuRojtGTZ'},
       'href': 'https://api.spotify.com/v1/artists/4a16JEaHldo5vZuRojtGTZ',
       'id': '4a16JEaHldo5vZuRojtGTZ',
       'name': 'Charles Trenet',
       'type': 'artist',
       'uri': 'spotify:artist:4a16JEaHldo5vZuRojtGTZ'}],
     'available_markets': [],
     'external_urls': {'spotify': 'https://open.spotify.com/album/4WDKGsDi7616S6Egs0quLq'},
     'href': 'https://api.spotify.com/v1/albums/4WDKGsDi7616S6Egs0quLq

## Call Functions 1.2

In [9]:
# fetch id function
def fetch_ids(songs):
    ids = []
    for song in songs['items']:
        ids.append(song['track']['id'])
    return ids

# fetch audio feature links function
def audio_features_(data):
    base =  'https://api.spotify.com/v1/audio-features/'
    list_of_ids = []
    for i in data:
        list_of_ids.append(base + str(i))
    return list_of_ids

# retrieve dictionary of track feature data
def fetch_features(links):
    feature_data = []
    for i in links:
        x = requests.get(i, headers=headers)
        j = x.content
        feature_data.append(json.loads(j))
    return feature_data

In [10]:
# combing the three functions into one

# all functions together to pull song list in and return dataframe
def song_return_feature_df(songs):
    ids = fetch_ids(songs)
    links = audio_features_(ids)
    feature_list = fetch_features(links)
    df = pd.DataFrame(feature_list)
    return df

## Call Functions 1.3

In [11]:
# call spotify playlist, return features as dataframe
def call_spotify_return_feat_df(genre_id):
    songs = call_spotify(genre_id)
    df = song_return_feature_df(songs)
    return df

In [507]:
# example of full function. Calling for song features in post rock playlist
french_df = call_spotify_return_feat_df(genre_dict.get('french playlist'))
french_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.219,0.168,0,-16.267,1,0.0505,0.965,0.106,0.373,0.548,69.091,audio_features,0IfKxVyiSQxRajCwEE4dqJ,spotify:track:0IfKxVyiSQxRajCwEE4dqJ,https://api.spotify.com/v1/tracks/0IfKxVyiSQxR...,https://api.spotify.com/v1/audio-analysis/0IfK...,201840,4
1,0.31,0.589,6,-11.097,0,0.273,0.728,0.0,0.963,0.605,61.011,audio_features,6b1FdyzHhNxM5mC9xoMvUw,spotify:track:6b1FdyzHhNxM5mC9xoMvUw,https://api.spotify.com/v1/tracks/6b1FdyzHhNxM...,https://api.spotify.com/v1/audio-analysis/6b1F...,209173,4
2,0.534,0.56,4,-7.565,1,0.0236,0.451,9.6e-05,0.11,0.599,89.885,audio_features,4sWGG5BEy2zYE81rFoHMBo,spotify:track:4sWGG5BEy2zYE81rFoHMBo,https://api.spotify.com/v1/tracks/4sWGG5BEy2zY...,https://api.spotify.com/v1/audio-analysis/4sWG...,275040,4
3,0.784,0.319,2,-11.813,1,0.081,0.85,0.0,0.129,0.777,115.591,audio_features,6DadMrUGPSV3gxQIHJ56C8,spotify:track:6DadMrUGPSV3gxQIHJ56C8,https://api.spotify.com/v1/tracks/6DadMrUGPSV3...,https://api.spotify.com/v1/audio-analysis/6Dad...,178440,3
4,0.79,0.102,11,-16.727,0,0.122,0.941,0.0,0.116,0.714,101.641,audio_features,6axtrF0lqCidyJAlxzX6F8,spotify:track:6axtrF0lqCidyJAlxzX6F8,https://api.spotify.com/v1/tracks/6axtrF0lqCid...,https://api.spotify.com/v1/audio-analysis/6axt...,182360,4


In [14]:
for i in french_df['key']:
    print(i)

0
6
4
2
11
1
2
8
8
9
9
0
7
11
1
8
1
1
5
9
9
4
4
0
1
5
7
11
4
0
2
1
0
4
2
11
6
3
11
11
9
4
11
0
7
4
7
4
9
11
11
10
2


## Call Functions 1.4

In [15]:
# retrieving track titles from playlist
def fetch_titles(songs):
    titles = []
    for song in songs['items']:
        titles.append(song['track']['name'])
    return titles

In [16]:
# calls playlist, returns titles of songs in playlist as dataframe
def fetch_track_names(genre_id):
    songs = call_spotify(genre_id)
    titles = fetch_titles(songs)
    df = pd.DataFrame(titles)
    df = df.rename(columns={0: 'Song Titles'})
    return df

In [509]:
# example of retrieving all track names
french_title_df = fetch_track_names(genre_dict.get('french playlist'))
french_title_df.head()

Unnamed: 0,Song Titles
0,La mer
1,Amsterdam - Live Olympia 1964
2,Le sud
3,A Paris
4,Chanson pour l'Auvergnat


## Call Functions 1.5

In [18]:
# retrieving track tempos 
def get_tempo(song_id):
    base = 'https://api.spotify.com/v1/audio-analysis/'
    link = base + song_id 
    r = requests.get(link, headers=headers)
    r_j = r.json()
    track_tempo = r_j['track']['tempo']
    return track_tempo

In [19]:
# retrieving all track tempos
def get_all_tempos(genre_id):
    songs = call_spotify(genre_id)
    ids = fetch_ids(songs)
    tempos = []
    for i in tqdm(ids):
        tempos.append(get_tempo(i))
    return tempos

In [20]:
# retrieving track tempo confidence
def get_tempo_consistency(song_id):
    base = 'https://api.spotify.com/v1/audio-analysis/'
    link = base + song_id 
    r = requests.get(link, headers=headers)
    r_j = r.json()
    tempo_confidence = r_j['track']['tempo_confidence']
    return tempo_confidence

In [21]:
# convert key number to pitch name
def convert_key_to_pitchname(track_key):
    if track_key == -1:
        return 'Key Not Defined'
    if track_key == 0:
        return 'C'
    if track_key == 1:
        return 'C#/Db'
    if track_key == 2:
        return 'D'
    if track_key == 3:
        return 'D#/Eb'
    if track_key == 4:
        return 'E'
    if track_key == 5:
        return 'F'
    if track_key == 6:
        return 'F#/Gb'
    if track_key == 7:
        return 'G'
    if track_key == 8:
        return 'G#/Ab'
    if track_key == 9:
        return 'A'
    if track_key == 10:
        return 'A#/Bb'
    if track_key == 11:
        return 'B'

In [22]:
# retrieving track key
def get_key(song_id):
    base = 'https://api.spotify.com/v1/audio-analysis/'
    link = base + song_id 
    r = requests.get(link, headers=headers)
    r_j = r.json()
    track_key = r_j['track']['key']
    converted_key = convert_key_to_pitchname(track_key)
    return converted_key

In [23]:
# retrieving all track keys from playlist
def get_all_keys(genre_id):
    songs = call_spotify(genre_id)
    ids = fetch_ids(songs)
    keys = []
    count = 0
    for i in ids:
        keys.append(get_key(i))
        print('--- key data {} retrieved ---'.format(count))
        count += 1
    return keys
# french_pl_key_df = pd.DataFrame(get_all_keys(genre_dict.get('french playlist')))

In [24]:
# retrieving track key confidence
def get_key_consistency(song_id):
    base = 'https://api.spotify.com/v1/audio-analysis/'
    link = base + song_id 
    r = requests.get(link, headers=headers)
    r_j = r.json()
    key_confidence = r_j['track']['key_confidence']
    return key_confidence

In [25]:
# convert mode to diatonic name
def convert_mode_to_diatonic_name(track_mode):
    if track_mode == -1:
        return 'Mode Not Defined'
    if track_mode == 0:
        return 'Minor'
    if track_mode == 1:
        return 'Major'

In [26]:
# retrieving track mode of key (maj or min)
def get_key_mode(song_id):
    base = 'https://api.spotify.com/v1/audio-analysis/'
    link = base + song_id
    r = requests.get(link, headers=headers)
    r_j = r.json()
    track_mode = r_j['track']['mode']
    converted_mode = convert_mode_to_diatonic_name(track_mode)
    return converted_mode

In [27]:
# retrieving all track key modes from playlist
def get_all_key_modes(genre_id):
    songs = call_spotify(genre_id)
    ids = fetch_ids(songs)
    modes = []
    count = 0
    for i in ids:
        modes.append(get_key_mode(i))
        print('--- mode data {} retrieved ---'.format(count))
        count += 1
    return modes
# french_pl_km_df = pd.DataFrame(get_all_key_modes(genre_dict.get('french playlist')))

In [28]:
# retrieveing track key mode confidence
def get_mode_consistency(song_id):
    base = 'https://api.spotify.com/v1/audio-analysis/'
    link = base + song_id 
    r = requests.get(link, headers=headers)
    r_j = r.json()
    mode_confidence = r_j['track']['mode_confidence']
    return mode_confidence

## Feature Engineering
In order to further describe how each genre might differ other than through the high level dimensions offered by Spotify's API, below are engineered features to help better describe the form of a track within a genre. The three elements of form explored are: 
    1. Harmonic Progression
    2. Modal Progression
    3. Tempo Progression
    
 These features are engineered using the audio analysis end points. Diving into the segment anaylsis and stringing each segment together to recognize the element's progression through out the track. 

In [29]:
# get audio analysis links
def audio_analysis_links(song_ids):
    base =  'https://api.spotify.com/v1/audio-analysis/'
    list_of_audio_analysis_links = []
    for i in tqdm(song_ids):
        list_of_audio_analysis_links.append(base + str(i))
    return list_of_audio_analysis_links

In [88]:
# example of retriving all audio anaylsis links for each song in playlist
# audio_analysis_links(fetch_ids(call_spotify(genre_dict.get('hip hop playlist'))))

['https://api.spotify.com/v1/audio-analysis/696DnlkuDOXcMAnKlTgXXK',
 'https://api.spotify.com/v1/audio-analysis/3ClBKQkKoaUQ6UOhe2xlJK',
 'https://api.spotify.com/v1/audio-analysis/3X6KN4WFgPYBETFMMX675P',
 'https://api.spotify.com/v1/audio-analysis/6u7jPi22kF8CTQ3rb9DHE7',
 'https://api.spotify.com/v1/audio-analysis/51Fjme0JiitpyXKuyQiCDo',
 'https://api.spotify.com/v1/audio-analysis/6fTt0CH2t0mdeB2N9XFG5r',
 'https://api.spotify.com/v1/audio-analysis/4VginDwYTP2eaHJzO0QMjG',
 'https://api.spotify.com/v1/audio-analysis/1iSqfoUFnQwV0QW1EfUit8',
 'https://api.spotify.com/v1/audio-analysis/56sxR3i2zr7AtFbgGu12ZD',
 'https://api.spotify.com/v1/audio-analysis/3VyjsVV24RmBIbWJAeUJNu',
 'https://api.spotify.com/v1/audio-analysis/5E1jEFXElio2eOxjJ3WLnk',
 'https://api.spotify.com/v1/audio-analysis/6MWtB6iiXyIwun0YzU6DFP',
 'https://api.spotify.com/v1/audio-analysis/7aiClxsDWFRQ0Kzk5KI5ku',
 'https://api.spotify.com/v1/audio-analysis/1CxYDvfq3KVvsPSZ9NCdM9',
 'https://api.spotify.com/v1/audio

In [30]:
# retrieve dictionary of track analysis data
def fetch_analysis(links):
    analysis_data = []
    count = 0
    for i in links:
        r = requests.get(i, headers=headers)
        r_j = r.content
        analysis_data.append(json.loads(r_j))
        print('--- data on link {} retrieved--'.format(count))
        count += 1
    return analysis_data


In [94]:
# fetch_analysis(audio_analysis_links(fetch_ids(call_spotify(genre_dict.get('hip hop playlist')))))

In [31]:
# keys of section (chord progression) in track
def get_harmonic_progression(song):
    section_keys = []
    for section in song['sections']:
        k = section['key']
        if k == -1:
            section_keys.append('Key not defined')
        if k == 0:
            section_keys.append('C')
        if k == 1:
            section_keys.append('C#/Db')
        if k == 2:
            section_keys.append('D')
        if k == 3:
            section_keys.append('D#/Eb')
        if k == 4: 
            section_keys.append('E')
        if k == 5: 
            section_keys.append('F')
        if k == 6:
            section_keys.append('F#/Gb')
        if k == 7:
            section_keys.append('G')
        if k == 8:
            section_keys.append('G#/Ab')
        if k == 9:
            section_keys.append('A')
        if k == 10:
            section_keys.append('A#/Bb')
        if k == 11: 
            section_keys.append('B')
    return section_keys

In [32]:
# mode of key in section (tonal progression)    
def get_harmonic_mode_progression(song):   
    section_key_modes = []
    for section in song['sections']:
        m = section['mode']
        if m == -1:
            section_key_modes.append('Mode not Defined')
        if m == 0:
            section_key_modes.append('Minor')
        if m == 1:
            section_key_modes.append('Major')
    return section_key_modes

In [33]:
# tempo changes through out sections
def get_tempo_progression(song):    
    section_tempo = []
    for section in song['sections']:
        section_tempo.append(section['tempo'])
    return section_tempo

In [34]:
# returns section info on track passed through
def get_section_info(songs):
    section_list = []
    for song in tqdm(songs):
        h_prog = get_harmonic_progression(song)
        m_prog = get_harmonic_mode_progression(song)
        t_prog = get_tempo_progression(song)
        section_prog = [h_prog, m_prog, t_prog]
        section_list.append(section_prog)
    return section_list

In [36]:
# retrieve all converted track key and key mode from playlist
def get_track_global_key_modes(genre_id):
    print('Getting Track Keys')
    track_keys = get_all_keys(genre_id)
    print('Getting Track Modes')
    track_modes = get_all_key_modes(genre_id)
    print('Compiling Global Data')
    print('\n')
    harmonic_info = ([track_modes, track_keys])
    return harmonic_info

## Function to Output all Engineered Features

In [37]:
# calls spotify playlist, returns section info on each track in playlist
def fetch_sections_info(genre_id):
    print('Getting Section Data')
    print('\n')
    data = call_spotify(genre_id)
    ids = fetch_ids(data)
    links = audio_analysis_links(ids)
    songs = fetch_analysis(links)
    section_info = get_section_info(songs)
    return section_info

In [38]:
# combine global and section info of each track from playlist 
# NOTE: for this function to work -- it needs a pivot table - or make function the cell below
def get_global_and_section_info(genre_id):
    print('Getting Global Data')
    print('\n')
    global_section_info = []
    global_info = get_track_global_key_modes(genre_id)
    print('Global Data Retrieved')
    print('\n')
    print('Getting Section Data')
    section_info = fetch_sections_info(genre_id)
    print('Section Data Retrieved')
    global_section_info = [global_info, section_info]
    print('Global and Section Data Compiled')
    return global_section_info
    

## Example of Engineered Features

In [39]:
french_pl_keys_df = pd.DataFrame(get_all_keys(genre_dict.get('french playlist')))
french_pl_keys_df = french_pl_keys_df.rename(columns={0: 'Track Keys'})
french_pl_key_modes_df = pd.DataFrame(get_all_key_modes(genre_dict.get('french playlist')))
french_pl_key_modes_df = french_pl_key_modes_df.rename(columns={0: 'Track Modes'})
french_global = pd.concat([french_pl_key_modes_df, french_pl_keys_df], axis = 1)

french_sections_df = pd.DataFrame(fetch_sections_info(genre_dict.get('french playlist')))
french_sections_df = french_sections_df.rename(columns={0: 'Harmonic Progression', 
                                                        1: 'Modal Progression',
                                                        2: 'Tempo Progression'})

french_global_and_section_info_df = pd.concat([french_global, french_sections_df], axis = 1)

--- key data 0 retrieved ---
--- key data 1 retrieved ---
--- key data 2 retrieved ---
--- key data 3 retrieved ---
--- key data 4 retrieved ---
--- key data 5 retrieved ---
--- key data 6 retrieved ---
--- key data 7 retrieved ---
--- key data 8 retrieved ---
--- key data 9 retrieved ---
--- key data 10 retrieved ---
--- key data 11 retrieved ---
--- key data 12 retrieved ---
--- key data 13 retrieved ---
--- key data 14 retrieved ---
--- key data 15 retrieved ---
--- key data 16 retrieved ---
--- key data 17 retrieved ---
--- key data 18 retrieved ---
--- key data 19 retrieved ---
--- key data 20 retrieved ---
--- key data 21 retrieved ---
--- key data 22 retrieved ---
--- key data 23 retrieved ---
--- key data 24 retrieved ---
--- key data 25 retrieved ---
--- key data 26 retrieved ---
--- key data 27 retrieved ---
--- key data 28 retrieved ---
--- key data 29 retrieved ---
--- key data 30 retrieved ---
--- key data 31 retrieved ---
--- key data 32 retrieved ---
--- key data 33 retr

100%|██████████| 53/53 [00:00<00:00, 167141.44it/s]


--- data on link 0 retrieved--
--- data on link 1 retrieved--
--- data on link 2 retrieved--
--- data on link 3 retrieved--
--- data on link 4 retrieved--
--- data on link 5 retrieved--
--- data on link 6 retrieved--
--- data on link 7 retrieved--
--- data on link 8 retrieved--
--- data on link 9 retrieved--
--- data on link 10 retrieved--
--- data on link 11 retrieved--
--- data on link 12 retrieved--
--- data on link 13 retrieved--
--- data on link 14 retrieved--
--- data on link 15 retrieved--
--- data on link 16 retrieved--
--- data on link 17 retrieved--
--- data on link 18 retrieved--
--- data on link 19 retrieved--
--- data on link 20 retrieved--
--- data on link 21 retrieved--
--- data on link 22 retrieved--
--- data on link 23 retrieved--
--- data on link 24 retrieved--
--- data on link 25 retrieved--
--- data on link 26 retrieved--
--- data on link 27 retrieved--
--- data on link 28 retrieved--
--- data on link 29 retrieved--
--- data on link 30 retrieved--
--- data on link 3

100%|██████████| 53/53 [00:00<00:00, 40491.46it/s]

--- data on link 52 retrieved--





In [508]:
french_global_and_section_info_df.head()

Unnamed: 0,Track Modes,Track Keys,Harmonic Progression,Modal Progression,Tempo Progression,Analysis,Track Title
0,Major,C,"[F, C, C, F#/Gb, C, C, B, A, D]","Major,Major,Major,Minor,Major,Major,Minor,Mino...","95.392,112.027,122.547,83.14,72.227,69.553,67....","[IV, I, I, #iv, I, I, vii/o, vi, ii]",La mer
1,Minor,F#/Gb,"[D, B, F#/Gb, C#/Db, E, D, C#/Db, F#/Gb, B]","Major,Minor,Minor,Major,Major,Major,Major,Mino...","138.424,136.428,131.483,133.906,124.14,129.318...","[bVI, iv, i, v, bvii, bVI, v, i, iv]",Amsterdam - Live Olympia 1964
2,Major,E,"[G#/Ab, G#/Ab, G#/Ab, E, B, E, F#/Gb, E, E, G#...","Minor,Minor,Minor,Major,Major,Major,Major,Majo...","88.913,89.599,90.038,90.039,89.875,89.987,92.3...","[iii, iii, iii, I, V, I, ii, I, I, iii, iii]",Le sud
3,Major,D,"[D, D, A, D#/Eb, D, D#/Eb, B, D]","Major,Major,Major,Major,Major,Major,Minor,Majo","115.29,116.341,115.261,115.403,115.378,115.338...","[I, I, V, bii, I, bii, vi, I]",A Paris
4,Minor,B,"[C, F#/Gb, D, F#/Gb, F#/Gb, D, B, F#/Gb, B]","Major,Minor,Major,Minor,Minor,Major,Minor,Majo...","101.869,101.684,102.357,101.909,100.553,102.24...","[bii, V, bIII, V, V, bIII, i, V, i]",Chanson pour l'Auvergnat


## Feature Engineering Function 1.2
Now that the dataframe is generated. There are a few more steps necessary before this data can be used.
    1. Clean dataframe cells so each cell has strings and not lists
    2. Transform the Harmonic Progression into Roman Numeral Analysis

In [41]:
# function to convert list of words into string
def l_to_s(_list):
    s = ''
    for i in _list:
        s = s + i + ','
    return s[:-2] 

In [42]:
# function to convert list of integers into a string
def l_to_int(_list):
    s = ''
    for i in _list:
        s = s + str(i) + ','
    return s[:-2] 

In [43]:
# cleaning
french_global_and_section_info_df['Harmonic Progression'] = french_global_and_section_info_df['Harmonic Progression'].apply(lambda x: list(x))
french_global_and_section_info_df['Modal Progression'] = french_global_and_section_info_df['Modal Progression'].apply(lambda x: l_to_s(x))
french_global_and_section_info_df['Tempo Progression'] = french_global_and_section_info_df['Tempo Progression'].apply(lambda x: l_to_int(x))

In [44]:
french_global_and_section_info_df.head()

Unnamed: 0,Track Modes,Track Keys,Harmonic Progression,Modal Progression,Tempo Progression
0,Major,C,"[F, C, C, F#/Gb, C, C, B, A, D]","Major,Major,Major,Minor,Major,Major,Minor,Mino...","95.392,112.027,122.547,83.14,72.227,69.553,67...."
1,Minor,F#/Gb,"[D, B, F#/Gb, C#/Db, E, D, C#/Db, F#/Gb, B]","Major,Minor,Minor,Major,Major,Major,Major,Mino...","138.424,136.428,131.483,133.906,124.14,129.318..."
2,Major,E,"[G#/Ab, G#/Ab, G#/Ab, E, B, E, F#/Gb, E, E, G#...","Minor,Minor,Minor,Major,Major,Major,Major,Majo...","88.913,89.599,90.038,90.039,89.875,89.987,92.3..."
3,Major,D,"[D, D, A, D#/Eb, D, D#/Eb, B, D]","Major,Major,Major,Major,Major,Major,Minor,Majo","115.29,116.341,115.261,115.403,115.378,115.338..."
4,Minor,B,"[C, F#/Gb, D, F#/Gb, F#/Gb, D, B, F#/Gb, B]","Major,Minor,Major,Minor,Minor,Major,Minor,Majo...","101.869,101.684,102.357,101.909,100.553,102.24..."


In [45]:
# read harmonic_function.py to see how Harmonic Progression is sorted
from harmonic_function import *

In [46]:
prog = get_progression_m(french_global_and_section_info_df['Track Modes'][0], 
                         french_global_and_section_info_df['Track Keys'][0], 
                         french_global_and_section_info_df['Harmonic Progression'][0])
prog

['IV', 'I', 'I', '#iv', 'I', 'I', 'vii/o', 'vi', 'ii']

## Make a DataFrame that has All Roman Numeral Analysis

In [47]:
def get_all_global_modes(dataframe):
    global_modes = []
    for i in dataframe['Track Modes']:
        global_modes.append(i)
    return global_modes

def get_all_global_keys(dataframe):
    global_keys = []
    for i in dataframe['Track Keys']:
        global_keys.append(i)
    return global_keys
        
def get_all_harmonic_prog(dataframe):
    harmonic_prog = []
    for i in dataframe['Harmonic Progression']:
        harmonic_prog.append(i)
    return harmonic_prog
        

In [48]:
def retrieve_roman_numeral_analysis(dataframe):
    modes = get_all_global_modes(dataframe)
    keys = get_all_global_keys(dataframe)
    harmonic_prog = get_all_harmonic_prog(dataframe)
    analysis = []
    for i in list(range(len(dataframe))):
        analysis.append(get_progression_m(modes[i], keys[i], harmonic_prog[i]))
    return analysis

In [49]:
french_global_and_section_info_df['Analysis'] = retrieve_roman_numeral_analysis(french_global_and_section_info_df)

In [50]:
french_global_and_section_info_df['Track Title'] = fetch_track_names(genre_dict.get('french playlist'))

In [504]:
french_global_and_section_info_df.head()

Unnamed: 0,Track Modes,Track Keys,Harmonic Progression,Modal Progression,Tempo Progression,Analysis,Track Title
0,Major,C,"[F, C, C, F#/Gb, C, C, B, A, D]","Major,Major,Major,Minor,Major,Major,Minor,Mino...","95.392,112.027,122.547,83.14,72.227,69.553,67....","[IV, I, I, #iv, I, I, vii/o, vi, ii]",La mer
1,Minor,F#/Gb,"[D, B, F#/Gb, C#/Db, E, D, C#/Db, F#/Gb, B]","Major,Minor,Minor,Major,Major,Major,Major,Mino...","138.424,136.428,131.483,133.906,124.14,129.318...","[bVI, iv, i, v, bvii, bVI, v, i, iv]",Amsterdam - Live Olympia 1964
2,Major,E,"[G#/Ab, G#/Ab, G#/Ab, E, B, E, F#/Gb, E, E, G#...","Minor,Minor,Minor,Major,Major,Major,Major,Majo...","88.913,89.599,90.038,90.039,89.875,89.987,92.3...","[iii, iii, iii, I, V, I, ii, I, I, iii, iii]",Le sud
3,Major,D,"[D, D, A, D#/Eb, D, D#/Eb, B, D]","Major,Major,Major,Major,Major,Major,Minor,Majo","115.29,116.341,115.261,115.403,115.378,115.338...","[I, I, V, bii, I, bii, vi, I]",A Paris
4,Minor,B,"[C, F#/Gb, D, F#/Gb, F#/Gb, D, B, F#/Gb, B]","Major,Minor,Major,Minor,Minor,Major,Minor,Majo...","101.869,101.684,102.357,101.909,100.553,102.24...","[bii, V, bIII, V, V, bIII, i, V, i]",Chanson pour l'Auvergnat


In [52]:
# function from harmonic_fucntion import
counting_unique_analysis = get_reduced_abstraction(retrieve_roman_numeral_analysis(french_global_and_section_info_df))

In [53]:
counting_unique_analysis

Counter({('#iv', 'I', 'IV', 'ii', 'vi', 'vii/o'): 1,
         ('bVI', 'bvii', 'i', 'iv', 'v'): 1,
         ('I', 'V', 'ii', 'iii'): 1,
         ('I', 'V', 'bii', 'vi'): 1,
         ('V', 'bIII', 'bii', 'i'): 1,
         ('I', 'IV', 'iii', 'vi'): 1,
         ('I', 'bvii', 'ii', 'iii', 'vi'): 1,
         ('I', 'IV', 'V', 'biii', 'bvi', 'bvii', 'iii'): 1,
         ('I', 'V', 'biii'): 1,
         ('bIII', 'i', 'iv'): 2,
         ('I', 'IV', 'V', 'vi'): 1,
         ('bIII', 'bVI', 'bVII', 'i', 'iv', 'v', 'vi'): 1,
         ('I', 'IV', 'V'): 1,
         ('bIII', 'bVII', 'bii', 'i', 'iv'): 1,
         ('I', 'IV', 'V', 'ii', 'vi'): 1,
         ('#iv', 'I', 'IV', 'V', 'vi'): 1,
         ('I', 'ii', 'vii/o'): 1,
         ('I', 'V', 'vii/o'): 1,
         ('I', 'V', 'iii', 'vi'): 1,
         ('bIII', 'bVI', 'i', 'iv'): 2,
         ('bIII', 'bVI', 'i'): 2,
         ('I', 'V', 'iii'): 3,
         ('bIII', 'bVII', 'bii', 'i', 'ii/o', 'v', 'vi'): 1,
         ('I', 'IV', 'ii', 'vi'): 2,
         ('bIII

In [229]:
# reducing the analysis to a basic outline to determine similarity and reoccurance of analysis
trial_list = []
for i in retrieve_roman_numeral_analysis(french_global_and_section_info_df):
    trial_list.append(set(i))
practice_count = get_reduced_abstraction(trial_list)
practice_count

In [474]:
analysis = get_reduced_abstraction(trial_list)
def check_for_duplicates(arg):
    duplicate_list = []
    unique_list = []
    for i in arg:
        if arg.count(i) > 1:  
            duplicate_list.append(i)
        if arg.count(i) == 1:
            unique_list.append(i)
    print('Sonorus Chords')
    print(set(duplicate_list))
    print('Coloring Chords')
    print(unique_list)
    print('\n')
    Harmonic_signature = [set(duplicate_list), unique_list]
    print('Harmonic Signature')
    return Harmonic_signature
    

In [59]:
list_of_analysis = retrieve_roman_numeral_analysis(french_global_and_section_info_df)
list_of_analysis

[['IV', 'I', 'I', '#iv', 'I', 'I', 'vii/o', 'vi', 'ii'],
 ['bVI', 'iv', 'i', 'v', 'bvii', 'bVI', 'v', 'i', 'iv'],
 ['iii', 'iii', 'iii', 'I', 'V', 'I', 'ii', 'I', 'I', 'iii', 'iii'],
 ['I', 'I', 'V', 'bii', 'I', 'bii', 'vi', 'I'],
 ['bii', 'V', 'bIII', 'V', 'V', 'bIII', 'i', 'V', 'i'],
 ['I', 'iii', 'I', 'I', 'IV', 'iii', 'vi', 'iii', 'I'],
 ['bvii', 'bvii', 'I', 'ii', 'I', 'iii', 'vi', 'I', 'bvii', 'I', 'iii', 'ii'],
 ['iii', 'biii', 'IV', 'bvi', 'V', 'V', 'I', 'I', 'bvii', 'I', 'IV'],
 ['I', 'I', 'biii', 'I', 'I', 'I', 'V'],
 ['bIII', 'i', 'bIII', 'i', 'i', 'i', 'bIII', 'i', 'iv'],
 ['I', 'vi', 'I', 'IV', 'vi', 'vi', 'V', 'I', 'IV', 'I'],
 ['vi', 'v', 'iv', 'vi', 'bVII', 'bVII', 'bIII', 'i', 'v', 'bVI'],
 ['I', 'I', 'I', 'I', 'I', 'IV', 'V', 'I', 'V'],
 ['bii', 'i', 'i', 'iv', 'bVII', 'i', 'bii', 'bIII', 'bIII'],
 ['I', 'I', 'I', 'V', 'V', 'vi', 'IV', 'ii', 'V'],
 ['IV', 'I', 'IV', 'IV', 'IV', 'IV', 'vi', 'V', '#iv'],
 ['vii/o', 'I', 'ii', 'I', 'ii', 'I'],
 ['vii/o', 'vii/o', 'I', 'I

In [298]:
for i in list_of_analysis:
    x = check_for_duplicates(i)
    print(x)
        

Sonorus Chords
['I', 'I', 'I', 'I']
Coloring Chords
['IV', '#iv', 'vii/o', 'vi', 'ii']
Progression Seperated
None
Sonorus Chords
['bVI', 'iv', 'i', 'v', 'bVI', 'v', 'i', 'iv']
Coloring Chords
['bvii']
Progression Seperated
None
Sonorus Chords
['iii', 'iii', 'iii', 'I', 'I', 'I', 'I', 'iii', 'iii']
Coloring Chords
['V', 'ii']
Progression Seperated
None
Sonorus Chords
['I', 'I', 'bii', 'I', 'bii', 'I']
Coloring Chords
['V', 'vi']
Progression Seperated
None
Sonorus Chords
['V', 'bIII', 'V', 'V', 'bIII', 'i', 'V', 'i']
Coloring Chords
['bii']
Progression Seperated
None
Sonorus Chords
['I', 'iii', 'I', 'I', 'iii', 'iii', 'I']
Coloring Chords
['IV', 'vi']
Progression Seperated
None
Sonorus Chords
['bvii', 'bvii', 'I', 'ii', 'I', 'iii', 'I', 'bvii', 'I', 'iii', 'ii']
Coloring Chords
['vi']
Progression Seperated
None
Sonorus Chords
['IV', 'V', 'V', 'I', 'I', 'I', 'IV']
Coloring Chords
['iii', 'biii', 'bvi', 'bvii']
Progression Seperated
None
Sonorus Chords
['I', 'I', 'I', 'I', 'I']
Coloring Ch

In [228]:
import re

In [438]:
# draft of function that takes in roman numeral analysis and abstracts the harmony into a numerical conversion
# the numerical conversion is used to generate a harmonic signature for each track

major_chord_analysis_finder = re.compile('^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$')
minor_i_chord_analysis_finder = re.compile('^i$')
minor_v_chord_analysis_finder = re.compile('^v$')
minor_ii_iv_chord_analysis_finder = re.compile('^i.$')
minor_iii_chord_analysis_finder = re.compile('^iii$')
minor_vi_chord_analysis_finder = re.compile('^vi$')
minor_vii_chord_analysis_finder = re.compile('^vii$')
minor_dim_vii_chord_analysis_finder = re.compile('^vii..$')
sharp_chord_analysis_finder = re.compile('^#..$')
sharp_chord_analysis_finder_ = re.compile('^#...$')
flat_chord_analysis_finder = re.compile('^b..$')
flat_chord_analysis_finder_ = re.compile('^b...$')
new_list_of_analysis = [[] for l in range(len(list_of_analysis))]
numerical_conversion = [[] for l in range(len(list_of_analysis))]
count = 0
for i in list_of_analysis:
    for a in i:
        print(count)
        major_result = re.findall(major_chord_analysis_finder, a)
        minor_i_result = re.findall(minor_i_chord_analysis_finder, a)
        minor_v_result = re.findall(minor_v_chord_analysis_finder, a)
        minor_ii_iv_result = re.findall(minor_ii_iv_chord_analysis_finder, a)
        minor_iii_result = re.findall(minor_iii_chord_analysis_finder, a)
        minor_vi_result = re.findall(minor_vi_chord_analysis_finder, a)
        minor_vii_result = re.findall(minor_vii_chord_analysis_finder, a)
        minor_dim_vii_result = re.findall(minor_dim_vii_chord_analysis_finder, a)
        sharp_result = re.findall(sharp_chord_analysis_finder, a)
        sharp_result_ = re.findall(sharp_chord_analysis_finder_, a)
        flat_result = re.findall(flat_chord_analysis_finder, a)
        flat_result_ = re.findall(flat_chord_analysis_finder_, a)
        if major_result:
            print("Major Chords found.")
            print(major_result)
            new_list_of_analysis[count].append(major_result)
            numerical_conversion[count].append('1')
        if minor_i_result:
            print("Minor Chords found.")	
            print(minor_i_result)
            new_list_of_analysis[count].append(minor_i_result)
            numerical_conversion[count].append('0')
        if minor_v_result:
            print(minor_v_result)
            new_list_of_analysis[count].append(minor_v_result)
            numerical_conversion[count].append('0')
        if minor_ii_iv_result:
            print(minor_ii_iv_result)
            new_list_of_analysis[count].append(minor_ii_iv_result)
            numerical_conversion[count].append('0')
        if minor_iii_result:
            print(minor_iii_result)
            new_list_of_analysis[count].append(minor_iii_result)
            numerical_conversion[count].append('0')
        if minor_vi_result:
            print(minor_vi_result)
            new_list_of_analysis[count].append(minor_vi_result)
            numerical_conversion[count].append('0')
        if minor_vii_result:
            print(minor_vii_result)
            new_list_of_analysis[count].append(minor_vii_result)
            numerical_conversion[count].append('0')
        if minor_dim_vii_result:
            print(minor_dim_vii_result)
            new_list_of_analysis[count].append(minor_dim_vii_result)
            numerical_conversion[count].append('0')
        if sharp_result:
            print("Sharp Chords Found")
            print(sharp_result)
            new_list_of_analysis[count].append(sharp_result)
            numerical_conversion[count].append('#')
        if sharp_result_:
            print(sharp_result_)
            new_list_of_analysis[count].append(sharp_result_)
            numerical_conversion[count].append('#')
        if flat_result:
            print('Flat Chords Found')
            print(flat_result)
            new_list_of_analysis[count].append(flat_result)
            numerical_conversion[count].append('b')
        if flat_result_:
            print(flat_result_)
            new_list_of_analysis[count].append(flat_result_)
            numerical_conversion[count].append('b')
        else:
            pass
    count += 1


0
Major Chords found.
[('', '', 'IV')]
0
Major Chords found.
[('', '', 'I')]
0
Major Chords found.
[('', '', 'I')]
0
Sharp Chords Found
['#iv']
0
Major Chords found.
[('', '', 'I')]
0
Major Chords found.
[('', '', 'I')]
0
['vii/o']
0
['vi']
0
['ii']
1
Flat Chords Found
['bVI']
1
['iv']
1
Minor Chords found.
['i']
1
['v']
1
['bvii']
1
Flat Chords Found
['bVI']
1
['v']
1
Minor Chords found.
['i']
1
['iv']
2
['iii']
2
['iii']
2
['iii']
2
Major Chords found.
[('', '', 'I')]
2
Major Chords found.
[('', '', 'V')]
2
Major Chords found.
[('', '', 'I')]
2
['ii']
2
Major Chords found.
[('', '', 'I')]
2
Major Chords found.
[('', '', 'I')]
2
['iii']
2
['iii']
3
Major Chords found.
[('', '', 'I')]
3
Major Chords found.
[('', '', 'I')]
3
Major Chords found.
[('', '', 'V')]
3
Flat Chords Found
['bii']
3
Major Chords found.
[('', '', 'I')]
3
Flat Chords Found
['bii']
3
['vi']
3
Major Chords found.
[('', '', 'I')]
4
Flat Chords Found
['bii']
4
Major Chords found.
[('', '', 'V')]
4
['bIII']
4
Major Chor

In [397]:
# drafting how to further clean out put of regex function
x = [(tuple(int(x) if x.isdigit() else x for x in _ if x)) for _ in new_list_of_analysis[0][0]]
x = ''.join(x[0])
x

'IV'

In [422]:
cleaned_list = [[] for l in range(len(new_list_of_analysis))]
count = 0
for x in new_list_of_analysis:
    for i in x:
        for a in i:
            if type(a) == tuple:
                x = [(tuple(int(x) if x.isdigit() else x for x in _ if x)) for _ in i]
                x = ''.join(x[0])
                cleaned_list[count].append(x)
            if type(a) == str:
                cleaned_list[count].append(a)
    count += 1
cleaned_list

            

[['IV', 'I', 'I', '#iv', 'I', 'I', 'vii/o', 'vi', 'ii'],
 ['bVI', 'iv', 'i', 'v', 'bvii', 'bVI', 'v', 'i', 'iv'],
 ['iii', 'iii', 'iii', 'I', 'V', 'I', 'ii', 'I', 'I', 'iii', 'iii'],
 ['I', 'I', 'V', 'bii', 'I', 'bii', 'vi', 'I'],
 ['bii', 'V', 'bIII', 'V', 'V', 'bIII', 'i', 'V', 'i'],
 ['I', 'iii', 'I', 'I', 'IV', 'iii', 'vi', 'iii', 'I'],
 ['bvii', 'bvii', 'I', 'ii', 'I', 'iii', 'vi', 'I', 'bvii', 'I', 'iii', 'ii'],
 ['iii', 'biii', 'IV', 'bvi', 'V', 'V', 'I', 'I', 'bvii', 'I', 'IV'],
 ['I', 'I', 'biii', 'I', 'I', 'I', 'V'],
 ['bIII', 'i', 'bIII', 'i', 'i', 'i', 'bIII', 'i', 'iv'],
 ['I', 'vi', 'I', 'IV', 'vi', 'vi', 'V', 'I', 'IV', 'I'],
 ['vi', 'v', 'iv', 'vi', 'bVII', 'bVII', 'bIII', 'i', 'v', 'bVI'],
 ['I', 'I', 'I', 'I', 'I', 'IV', 'V', 'I', 'V'],
 ['bii', 'i', 'i', 'iv', 'bVII', 'i', 'bii', 'bIII', 'bIII'],
 ['I', 'I', 'I', 'V', 'V', 'vi', 'IV', 'ii', 'V'],
 ['IV', 'I', 'IV', 'IV', 'IV', 'IV', 'vi', 'V', '#iv'],
 ['vii/o', 'I', 'ii', 'I', 'ii', 'I'],
 ['vii/o', 'vii/o', 'I', 'I

In [441]:
list_of_analysis[52]

['iv', 'i', 'i', 'bIII', 'i', 'i', 'i', 'bIII', 'i']

In [424]:
new_list_of_analysis[52]

[['iv'], ['i'], ['i'], ['bIII'], ['i'], ['i'], ['i'], ['bIII'], ['i']]

In [426]:
cleaned_list[52]

['iv', 'i', 'i', 'bIII', 'i', 'i', 'i', 'bIII', 'i']

In [470]:
# the intention with the check for duplicates function is reduce each track to a harmonic sig
check_for_duplicates(cleaned_list[0])

Sonorus Chords
{'I'}
Coloring Chords
['IV', '#iv', 'vii/o', 'vi', 'ii']


Harmonic Signature


["{'I'}", ['IV', '#iv', 'vii/o', 'vi', 'ii']]

In [475]:
# 0 = minor chords
# 1 = major chords
# b = flat chords
# # = sharp chords

harmonic_sig = check_for_duplicates(numerical_conversion[0])
harmonic_sig

# for this example, it explains that in a fundamental sense, the peice goes from minor to major and is colored with sharp chords

Sonorus Chords
{'0', '1'}
Coloring Chords
['#']


Harmonic Signature


[{'0', '1'}, ['#']]

In [494]:
harmonic_signatures = []
for i in numerical_conversion: 
    x = check_for_duplicates(i)
    harmonic_signatures.append(x)
    harmonic_sig_df = pd.DataFrame(harmonic_signatures)
    harmonic_sig_df = harmonic_sig_df.rename(columns={0: 'Shape', 1: 'Color'})
harmonic_sig_df['Shape'] = harmonic_sig_df['Shape'].apply(lambda x: str(x))
harmonic_sig_df['Color'] = harmonic_sig_df['Color'].apply(lambda x: str(x))

Sonorus Chords
{'0', '1'}
Coloring Chords
['#']


Harmonic Signature
Sonorus Chords
{'0', 'b'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'1', '0'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'1', 'b'}
Coloring Chords
['0']


Harmonic Signature
Sonorus Chords
{'0', '1', 'b'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'0', '1'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'0', '1', 'b'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'1', 'b'}
Coloring Chords
['0']


Harmonic Signature
Sonorus Chords
{'1'}
Coloring Chords
['b']


Harmonic Signature
Sonorus Chords
{'0', 'b'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'0', '1'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'0', 'b'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'1'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'0', 'b'}
Coloring Chords
[]


Harmonic Signature
Sonorus Chords
{'0', '1'}
Coloring Chords
[]


Harmonic Signatur

In [501]:
harmonic_sig_df.head()

Unnamed: 0,Shape,Color
0,"{'0', '1'}",['#']
1,"{'0', 'b'}",[]
2,"{'1', '0'}",[]
3,"{'1', 'b'}",['0']
4,"{'0', '1', 'b'}",[]


In [497]:
encoded_shape = pd.get_dummies(harmonic_sig_df['Shape'])
encoded_color = pd.get_dummies(harmonic_sig_df['Color'])


In [502]:
encoded_shape.head()

Unnamed: 0,"{'0', '1', 'b'}","{'0', '1'}","{'0', 'b'}",{'0'},"{'1', '0', 'b'}","{'1', '0'}","{'1', 'b'}",{'1'}
0,0,1,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0
2,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,1,0
4,1,0,0,0,0,0,0,0


In [503]:
encoded_color.head()

Unnamed: 0,['#'],"['0', '#']","['0', 'b']",['0'],['1'],['b'],[]
0,1,0,0,0,0,0,0
1,0,0,0,0,0,0,1
2,0,0,0,0,0,0,1
3,0,0,0,1,0,0,0
4,0,0,0,0,0,0,1
