## Dependencies

In [None]:
#Common Dependencies
import pandas as pd
import random as rnd
import seaborn as sns
import matplotlib.pyplot as plt
#Raw Data Extractor
from modules.authenticator import authenticator
from modules.get_info import get_info
from modules.extract_features import extract_features
#Data Editor
from modules.create_sample import create_sample
from modules.populate import populate
#Spotify User Credentials
sp = authenticator()

pd.set_option('display.max_rows', 10)

## Raw Data Extractor

### Provide Spotify links

In [None]:
#Links
links = ['https://open.spotify.com/album/7MUY0WxCmHcgEEeQNjoe8a?si=qKqQX0G2QTyvix4uSsraZA']

### Extract raw data 

In [None]:
#Extract URI from links
print('Extracting URIs from links...')
info_dict, url_types = get_info(sp, links)
print('............................')
print('URIs have been extracted\n')

#Extract data from URIs
print('Extracting data...')
dataframe = extract_features(sp, info_dict, url_types)
print('............................')
print('Data has been extracted\n')

#Remove duplicates
print('Number of duplicates:', dataframe.duplicated().sum())
if dataframe.duplicated().sum() > 0:
    print('Removing duplicates...')
    dataframe = dataframe.drop_duplicates()
    print('Duplicates have been removed')
    print('Number of duplicates:', dataframe.duplicated().sum())

#Show a dataframe
print('\nDataframe:')
dataframe

## Data Editor

### (I) Populating Median Neigbourhood

In [None]:
#Populate with additional data
print('Shape before populating:', dataframe.shape)
print('............................')
print('Populating...')
print('Populating has been done')
dataframe = populate(dataframe, 100)
print('............................')
print('Shape after populating:', dataframe.shape)

#Show a dataframe
print('\nDataframe:')
dataframe

In [None]:
dataframe['danceability'].describe()[4]

### (II) Feature Extracting

In [None]:
#Calculate and add additional features columns
dataframe['danceability_energy_ratio'] = dataframe['danceability'] * dataframe['energy']
dataframe['danceability_valence_ratio'] = dataframe['danceability'] * dataframe['valence']

#Show a dataframe
print('\nDataframe:')
dataframe

### (III) KDE Trimming

In [None]:
# KDE plot for feature before trimming
plt.subplot(1, 2, 1)
sns.kdeplot(data=dataframe, x='danceability', bw_adjust=0.5)
plt.xlabel('Danceability')
plt.ylabel('Density')
plt.title('KDE Plot Before Trimming')

#Trim features
dataframe = dataframe[(dataframe['danceability'] > 0.4) & (dataframe['danceability'] < 0.5)]

# KDE plot for feature after trimming
plt.subplot(1, 2, 2)
sns.kdeplot(data=dataframe, x='danceability', bw_adjust=0.5)
plt.xlabel('Danceability')
plt.ylabel('Density')
plt.title('KDE Plot After Trimming')

plt.tight_layout()
plt.show()

### For training data set tracklist genre

In [None]:
dataframe['genres'] = 'hiphop'
dataframe.head()

## Save data to csv

In [None]:
dataframe.to_csv('Albums/m_ToryLanez.csv')