## Using Data_Gathering_Function Module
- This notebook generates dataframes for modeling per playlist.
- All you need to do is change the name of the playlist and run all the cells

In [None]:
# import the data_gathering_function module to have access to all cleaning and gathering materials
from data_gathering_function import *

In [None]:
# spotify's api access token needs to be refreshed every hour. 
# by executing this cell, the access token is refreshed
# get_tokens()

In [None]:
# dictionary of genre and respective playlist ID
genre_dict

In [None]:
# change name of playlist from genre_dict
name = 'hip hop'

play_list = genre_dict.get(name + ' playlist')
print(play_list)

In [None]:
# how to retrieve genre high level analysis dataframe
df = call_spotify_return_feat_df(play_list)
df.head()

## Engineered Features

In [None]:
# how to retrieve engineered feature dataframe part 1
pl_keys_df = pd.DataFrame(get_all_keys(play_list))
pl_keys_df = pl_keys_df.rename(columns={0: 'Track Keys'})
pl_key_modes_df = pd.DataFrame(get_all_key_modes(play_list))
pl_key_modes_df = pl_key_modes_df.rename(columns={0: 'Track Modes'})
global_df = pd.concat([pl_key_modes_df, pl_keys_df], axis = 1)

sections_df = pd.DataFrame(fetch_sections_info(play_list))
sections_df = classical_sections_df.rename(columns={0: 'Harmonic Progression', 
                                                    1: 'Modal Progression',
                                                    2: 'Tempo Progression'})

global_and_section_info_df = pd.concat([global_df, sections_df], axis = 1)
global_and_section_info_df.head()

In [None]:
# cleaning
global_and_section_info_df['Harmonic Progression'] = global_and_section_info_df['Harmonic Progression'].apply(lambda x: list(x))
global_and_section_info_df['Modal Progression'] = global_and_section_info_df['Modal Progression'].apply(lambda x: l_to_s(x))
global_and_section_info_df['Tempo Progression'] = global_and_section_info_df['Tempo Progression'].apply(lambda x: l_to_int(x))

In [None]:
# read harmonic_function.py to see how Harmonic Progression is sorted
from harmonic_function import *

In [None]:
prog = get_progression_m(global_and_section_info_df['Track Modes'][0], 
                         global_and_section_info_df['Track Keys'][0], 
                         global_and_section_info_df['Harmonic Progression'][0])
prog

In [None]:
global_and_section_info_df['Analysis'] = retrieve_roman_numeral_analysis(global_and_section_info_df)
global_and_section_info_df['Track Title'] = fetch_track_names(play_list)

In [None]:
global_and_section_info_df.head()

In [None]:
# making a list of all roman numeral analysis
list_of_analysis = retrieve_roman_numeral_analysis(global_and_section_info_df)
list_of_analysis

In [None]:
# example of convereted list of analysis
converted = get_numeric_conversion(list_of_analysis)
# first list is cleaned roman numeral analysis
converted[0]

In [None]:
# second list is numeric conversion of roman numeral analysis
converted[1]

In [None]:
# example of how each converted list cen be used with get_harmonic_signature
get_harmonic_signature(converted[0][0])

In [None]:
get_harmonic_signature(converted[1][0])

# 0 = minor chords
# 1 = major chords
# b = flat chords
# # = sharp chords

# for this example, 
# it explains in a fundamental sense, the peice stays in a major functional harmony

In [None]:
# example of encoded shape and color full function
shape_and_color_df = encode_shape_and_color(converted)

In [None]:
shape_and_color_df.head()

In [None]:
# combining all engineered features
en_feat_df = pd.concat([global_and_section_info_df,
                        shape_and_color_df],
                        axis = 1)

In [None]:
# combining engineered features with spotify high level analysis
full_df = pd.concat([en_feat_df,
                     df],
                     axis = 1)

In [None]:
# now we need to further clean the dataframe to look at necessary information for modeling
cleaned_df = full_df.drop(['analysis_url', 
                           'track_href', 
                           'type', 
                           'uri', 
                           'id', 
                           'Tempo Progression', 
                           'Modal Progression',
                           'Track Modes',
                           'tempo',
                           'duration_ms',
                           'Harmonic Progression',
                           'Analysis',
                           'Track Title'], 
                           axis=1)

In [None]:
encoded_keys = pd.get_dummies(cleaned_df['Track Keys'])
cleaned_df = pd.concat([cleaned_df,
                        encoded_keys],
                        axis = 1)
cleaned_df = cleaned_df.drop(['Track Keys'], axis = 1)

In [None]:
encoded_time = pd.get_dummies(cleaned_df['time_signature'])
cleaned_df = pd.concat([cleaned_df,
                        encoded_time],
                        axis = 1)
cleaned_df = cleaned_df.drop(['time_signature'], axis = 1)

In [None]:
cleaned_df['Genre'] = name

In [None]:
# dataframe of all engineered features and apotify's high level anaylsis 
cleaned_df.columns

In [None]:
cleaned_df.to_csv('./genre_csv/' + name + '_CSV', index=False)

In [None]:
pd.read_csv('./genre_csv/' + name + '_CSV')