## Importing the necessary libraries

In [None]:
import numpy as np
import pandas as pd
import math
import datetime
import requests
from config import *
from bs4 import BeautifulSoup
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
from time import sleep
from random import randint

In [None]:
!pip install bs4

In [None]:
!pip install spotipy

In [None]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= client_id,
                                                           client_secret= client_secret))

## Creating search_song()

Create a function to search a given song in the Spotify API: search_song(). Take into account that sometimes Spotify's API will return several matches for the same song title (different artists, a different album of the same artist, version of the song,...etc). Then it will be nice to display a list of outputs to the user and let him/her select which is the right match. Once the desired song is located, the function should return the href/id/uri of the song to the code (not to the user).

In [None]:
billboard100 = pd.read_csv('billboard100.csv')

In [None]:
billboard100.head()

In [None]:
def search_song_ids(df, stepsize=20):
    song_ids = []
    for number in range(0,len(df),stepsize):
        print("Getting the song ids for the chunk:",number)
        print()
        for index in list(range(len(df)))[number:number+stepsize]:
            artist = df.iloc[index,1]
            title = df.iloc[index,0]
            try:
                results = sp.search(q="track:"+title+" artist:"+artist,limit=1)
                song_id = results['tracks']['items'][0]['id']
                song_ids.append(song_id)
            except:
                print("The song: {} of artist: {} is not on Spotify".format(title, artist))
                song_ids.append(np.nan)
        sleep(20)
        print()
        print()
    return song_ids

### Applying search_song to the billboard dataset and dropping the missing values.

In [None]:
songs_ids_100 = search_song_ids(billboard100)

In [None]:
display (songs_ids_100)

In [None]:
billboard100.shape

In [None]:
billboard100.insert(loc=0, column='song_id', value=songs_ids_100)

In [None]:
billboard100.insert(loc=3 , column= 'dataset', value='H' )

In [None]:
billboard100.head()

In [None]:
billboard100.dropna(inplace=True)

In [None]:
billboard100.shape

### Applying search_song to the not_hot dataset and dropping missing values

In [None]:
nothotsongs = pd.read_csv('not_hot_songs.csv')

In [None]:
songs_ids_nothot = search_song_ids(nothotsongs)

In [None]:
nothotsongs.insert(loc=0, column='song_id', value=songs_ids_nothot)

In [None]:
nothotsongs.insert(loc=3 , column= 'dataset', value='NH' )

In [None]:
nothotsongs.dropna(inplace=True)

## Creating "get_audio_features(list_of_songs)"

Create a function "get_audio_features(list_of_songs)" to obtain the audio features of a given list of songs (the content of list_of_songs can be the href/id/uri). Then, use this function to create a Pandas Dataframe with the audio features of the list of songs. Hint: create a dictionary with the song's audio features as keys and an empty list as values. Then fill in the lists with the corresponding audio features of each song. Finally, create your data frame from the dictionary. Bear in mind the following: This API has a restriction on the

In [None]:
def get_audio_features(df, stepsize=20):
    song_features = []
    for number in range(0,len(df),stepsize):
        print("Getting the features for the chunk:",number)
        print()
        for index in list(range(len(df)))[number:number+stepsize]:
            song_id = df.iloc[index,0]
            try:
                my_dict = sp.audio_features(song_id)[0]
                song_features.append(my_dict)
                
            except:
                print("The features for {} are not in spotify".format(song_id))
                my_new_dict = {}
                song_features.append(my_new_dict)
        
                
        sleep(20)
        print()
        print()
    song_features = pd.DataFrame(song_features)
    return song_features

### Applying get_audio_features to billboard100

In [None]:
billboard100features = get_audio_features(billboard100)

In [None]:
billboard100_final = pd.concat([billboard100.reset_index(), billboard100features], axis=1)
billboard100_final.drop(columns=['index', 'id'], inplace=True)
billboard100_final.head()

In [None]:
billboard100_final.to_csv('billboard100_final.csv', index=False)

### Applying get_audio_features to nothotsongs

In [None]:
nothotsongsfeatures = get_audio_features(nothotsongs)

In [None]:
nothotsongs_final = pd.concat([nothotsongs.reset_index(), nothotsongsfeatures], axis=1)
nothotsongs_final.drop(columns=['index', 'id'], inplace=True)
nothotsongs_final.head()

In [None]:
nothotsongs_final.to_csv('nothotsongs_final.csv', index=False)

## Creating "add_audio_features(df, audio_features_df)"

Once the previous function has been created, create another function "add_audio_features(df, audio_features_df)" to concat a given data frame with the data frame containing the audio features alongside any other desired info, and return the extended data frame.

Replace the old internal files of songs (hot and not hot) with the extended data frames with the audio features and save them into separate files on the disk.

def add_audio_features(df, audio_features_df): 
    dffinal = pandas.concat(df, audio_features_df, axis=1)
return 