In [34]:
import warnings
                      
import pandas  ## This is the module for creating and manipulating DataFrame

from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)


##  Config Variables

In [35]:
#Name of csv file used for dataframe.
relative_data_path = 'data.csv'
Data_DF = get_csv_data(relative_data_path)
features_lst = ['valence', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'popularity', 'speechiness', 'tempo']
num_explicit = len(Data_DF[Data_DF['explicit'] !=0])
print(num_explicit)

14433


## Functions

**get_csv_data** – Takes the location of a csv file and returns a dataframe of the csv file.

In [36]:
def get_csv_data(location):

    df = pandas.read_csv(location)
    return df

**validate_user_input** – Takes a string (feature) and returns a strin

In [37]:
def validate_user_input(feature):
    
    while True:
        try: 
            response = input("Please enter a whole number between 0 and 10 to indicate the level of {} in your playlist.\n \n0 = Minimum {} \n10 = Maximum {} \n \nPlease enter a whole number between 0 and 10: ".format(feature,feature,feature))
            print()
                                   
            if response not in ['0','1','2','3','4', '5', '6','7','8','9','10']:
                print("{} is not a whole number between 0 and 10! \nPlease input a whole number between 0 and 10!\n".format(response))
                continue
            break
        except ValueError:
            print("{} is not a whole number between 0 and 10! \nPlease input a whole number between 0 and 10!\n".format(response))
    
    print("You've selected {} out of 10, for {}.\n".format(response, feature))
    feature_weight = int(response)/10
    
    return feature_weight
#validate_user_input('energy')

In [19]:
def validate_selected_features(features):
    selected_features = list()
    
    while True:
        try: 
            response = input("Please select which feature you'd like to base your playlist suggestion on.\nIf you would like to select multiple features, please select the most important one first. Thank you.\n\
                             \n1 = {} \n2 = {} \n3 = {} \n4 = {} \n5 = {} \n6 = {} \n7 = {} \n8 = {} \n9 = {} \n10 = {} \n\nPlease enter one of the numbers shown above: ".format(features[0],features[1],features[2],features[3],features[4],features[5],features[6],features[7],features[8],features[9]))
            print()
                                   
            if response not in ['1','2','3','4', '5', '6','7','8','9','10']:
                print("{} is not an option! \nPlease input a whole number between 1 and 10!".format(response))
                continue
            selected_features.append(features[int(response)-1])
            print("You've selected {}.".format(features[int(response)-1]))
            print()
            while True:
                try:
                    again = input("Would you like to select another feature? \nPlease enter yes or no: ")
                    print()
            
                    if again.lower() not in ['yes', 'no']:
                        print("{} is not an option! \nPlease answer with yes or no.\n".format(again))
                        continue
                    break
                except ValueError:
                    print("{} is not an option! \nPlease answer with yes or no.".format(again))
            if again.lower() == 'no':
                break
            else:
                continue
        except ValueError:
            print("{} is not an option! \nPlease input a whole number between 1 and 10!".format(response))
    
    print("Thank you. \n\nYou've selected:\n")
    print()
    
    selected_features_lst = list(dict.fromkeys(selected_features))
    for i in selected_features_lst:
        print(i)
    
    print()
    return selected_features_lst
#validate_selected_features(features_lst)

###### Dataframe sorting and filtering funtions

**sort_by_feature** – Takes a dataframe (df), a string columnnsme (feature) and a boolean (order). The dataframe will be sorted based on feature and the value of order. The sorted dataframe is returned.

In [38]:
def sort_by_feature(df, feature, order):
    sorted_df = df.sort_values(by=[feature], ascending=order)
    return sorted_df

#sort_by_feature(df, "energy", True)

**data_by_feature_range** – Takes a datframe (df), a string columnname (feature) and two floats (lower_lim, upper_lim). Dataframe is filtered based on feature within a lower and upper value limit. Returns dataframe within the feature limits.

In [39]:
def data_by_feature_range(df, feature, lower_lim, upper_lim):
    filter_DF = df[(df[feature] >= lower_lim) & (df[feature] <= upper_lim)]
    return filter_DF

#data_by_feature_range(Data_DF, "energy", 0.1, 0.3)

**get_selected_feature_value** – Takes dataframe (df), string columnname (feature), float (percent) value between 0 and 1 representing percent. Returns the selected percentage value of the feature e.g., 40% of 30000 where 40% is represented as 0.4.

In [40]:
def get_selected_feature_value(df, feature, percent):
    min_val = df[[feature]].min().item()
    max_val = df[[feature]].max().item()
   
    total = max_val - min_val
    feature_percent = total*percent
   
    feature_value = feature_percent+min_val
    
    return feature_value

#get_selected_feature_value(Data_DF, 'danceability', 0.4)

In [41]:
def find_value_difference(df, feature, value):
    difference = []
    for i, row in df.iterrows():
        difference.append(abs(row[feature] - value))
    df['difference'] = difference
    sorted_df = df.sort_values(by=['difference'], ascending = True)
    del sorted_df['difference']
    
    return sorted_df

#find_value_difference(Data_DF, "energy", 0.4)

In [42]:
def return_songs_artist(df):
    name_list = df['name'].values.tolist()
    artist_list = df['artists'].values.tolist()
  
    remove_char_lst = ["[", "]", "'"]
    print('–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––')
    for i in range(len(name_list)):
        artist_str = artist_list[i]
        song_str = name_list[i]
        for char in remove_char_lst:
            artist_str = artist_str.replace(char, "")
            song_str = song_str.replace(char, "")
        artists = artist_str.split(", ")
        print(song_str, end = '\n')
        print()
        print('By:')
       
        for j in range(len(artists)):
            print(artists[j])
        print()
        print('–––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––')
        #the_list.append([name_list[i],artist_list[i]])
    

#return_songs_artist(Data_DF)

The following function allows the user to say how many songs they would like suggested

In [43]:
def validate_num_songs():
    while True:
        try:
            value = input("Please enter the number of songs you would like with no spaces, periods or commas.\n\nPlease enter a number: ")
            print()
            
            if not isinstance(int(value), int):
                print("{} is not a whole number! \nPlease input an interger!\n".format(value))
                continue
            if int(value) < 0:
                print("{} is a negative number! \nPlease enter a positive number!\n".format(value))
                continue
            if int(value) == 0:
                print("{} is not an option! \nPlease a number greater than 1!\n".format(value))
                continue
            if int(value) > 170654:
                print("{} is too big! There are a maximum of 170,654 songs in our database.".format(value))
                continue
            break
        except ValueError:
            print("{} is not a number! \nPlease input a whole number!\n".format(value))
    
    num_songs = int(value)
    if num_songs == 1:
        print("You've requested {} song.\n".format(value))
        return num_songs
    else:
        print("You've requested {} songs.\n".format(value))
        return num_songs

#validate_num_songs()

The function below allows the user to pick whether to include explicit songs

In [44]:
def validate_explicit():
    
    while True:
        try:
            ans = input("Would you like to exclude explicit songs? Please be aware that ansering yes may result in less song suggestions than requested.\n Please enter yes or no: ")
            print()
            if ans.lower() not in ['yes', 'no']:
                print("{} is not an option! \nPlease answer with yes or no.\n".format(again))
                continue
            break
        except ValueError:
            print("{} is not an option! \nPlease answer with yes or no.".format(again))
    
    if ans.lower() == 'yes':
        return True
    else:
        return False

Example of whole thing, when the user asks for a curated playlist:

In [45]:


metrics = validate_selected_features(features_lst)
num_songs = validate_num_songs()
not_explicit = validate_explicit()

if num_songs == 170654 and not_explicit:
    print("You've selected the maximum number of songs in our database. By removing explicit songs you will receive less than 170,654 song suggestions.")

if len(metrics) == 1:
    feature_weight = validate_user_input(metrics[0])
    feature_range = get_selected_feature_value(Data_DF, metrics[0], feature_weight)
    print("Please wait a moment, it may take a few minutes to curate your song selection.\n")
    sorted_df = find_value_difference(Data_DF, metrics[0], feature_range)
    
    if num_songs == 170654 and not_explicit:
        none_explicit_df = sorted_df[sorted_df['explicit'] !=1]
        filtered_df = none_explicit_df[:]
    elif not_explicit:
        none_explicit_df = sorted_df[sorted_df['explicit'] !=1]
        filtered_df = none_explicit_df[:num_songs]
    else:
        filtered_df = sorted_df[:num_songs]
                
    print("Your recommended songs are:\n")
    return_songs_artist(filtered_df)
    print("\nThank you for using our Spotify playlist curator!")

else:
    if num_songs == 170654:
        temp_num_songs = num_songs
    elif num_songs*len(metrics) > 170654:
        temp_num_songs = 170654
    elif not_explicit and num_songs < num_explicit:
        temp_num_songs = (num_songs*len(metrics))+num_explicit
    else:
        temp_num_songs = num_songs*len(metrics)
        
    multiple_feat_df = Data_DF
    for i in metrics:
        feature_weight = validate_user_input(i)
        feature_range = get_selected_feature_value(multiple_feat_df, i, feature_weight)
        print("Please wait, this may take a while.\n")
        print()
        sorted_df = find_value_difference(multiple_feat_df, i, feature_range)
        multiple_feat_df = sorted_df[:temp_num_songs]
        temp_num_songs = temp_num_songs - len(metrics)
    
    if not_explicit:
        none_explicit_df = multiple_feat_df[multiple_feat_df['explicit'] !=1]
        filtered_df = none_explicit_df[:num_songs]
    else:
        filtered_df = multiple_feat_df[:num_songs]
                
    print("Your recommended songs are:\n")
    return_songs_artist(filtered_df)
    print("\nThank you for using our Spotify playlist curator!")
        

Please select which feature you'd like to base your playlist suggestion on.
If you would like to select multiple features, please select the most important one first. Thank you.
                             
1 = valence 
2 = acousticness 
3 = danceability 
4 = energy 
5 = instrumentalness 
6 = liveness 
7 = loudness 
8 = popularity 
9 = speechiness 
10 = tempo 

Please enter one of the numbers shown above: 3

You've selected danceability.

Would you like to select another feature? 
Please enter yes or no: yes

Please select which feature you'd like to base your playlist suggestion on.
If you would like to select multiple features, please select the most important one first. Thank you.
                             
1 = valence 
2 = acousticness 
3 = danceability 
4 = energy 
5 = instrumentalness 
6 = liveness 
7 = loudness 
8 = popularity 
9 = speechiness 
10 = tempo 

Please enter one of the numbers shown above: 10

You've selected tempo.

Would you like to select another feature? 
Ple

In [None]:
small_DF = New_DF.loc[New_DF.index[0:len(output)]]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

labels = small_DF['name'] #labels of the bars
energy_levels = small_DF['energy'] #energy levels
dance_levels= small_DF['danceability'] #dance levels
liveness_levels = small_DF['liveness'] #liveness levels

x = np.arange(len(labels))  #get values for interval
bar_w = 0.2  # set width of bars

f, ax = plt.subplots(figsize = (10,7)) #make the plot, custom size
bar1 = ax.bar(x - bar_w/2, energy_levels, bar_w, label='Energy') #set the bars
bar2 = ax.bar(x + bar_w/2, dance_levels, bar_w, label='Danceability')
bar3 = ax.bar(x + 3*(bar_w/2), liveness_levels, bar_w, label='Liveness')

# Adding text
ax.set_ylabel('Level')
ax.set_xlabel('Song Title')
ax.set_title('Song Metrics')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
f.autofmt_xdate() #make space for the labels 

f.tight_layout()
plt.show() #display the graph