In [54]:
import io
import pandas as pd
import json
import requests
import time
import tqdm
import numpy as np


def week_dates(date, weekday=0):
    week_start = date - pd.DateOffset(weekday=weekday, weeks=1)
    week_end = date + pd.DateOffset(weekday=weekday, weeks=0)
    return week_start, week_end


def get_chart(date, region='en', freq='daily', chart='top200'):
    chart = 'regional' if chart == 'top200' else 'viral'
    date = pd.to_datetime(date)
    if date.year < 2017:
        raise ValueError('No chart data available from before 2017')
    if freq == 'weekly':
        start, end = week_dates(date, weekday=4)
        date = f'{start.date()}--{end.date()}'
    else:
        date = f'{date.date()}'
    url = f'https://spotifycharts.com/{chart}/{region}/{freq}/{date}/download'
    data = io.StringIO(requests.get(url).text)
    try:
        df = pd.read_csv(data, skiprows=1) # Fix Spotify's Note
    except pd.errors.ParserError:
        df = None
        print(data)
    return df


def get_charts(start, end=None, region='en', freq='daily', chart='top200', sleep=1):
    sample = 'D' if freq == 'daily' else 'W'
    end_date = start if end == None else end
    dfs = []
    for date in tqdm.tqdm(pd.date_range(start=start, end=end_date, freq=sample)):
        df = get_chart(date, region=region, freq=freq, chart=chart)
        if df is not None:
            df['Date'] = date
            df = df.head(50)
            dfs.append(df)
            time.sleep(sleep)
    return pd.concat(dfs, ignore_index=True)

In [75]:
chart = get_charts('2019-08-01','2019-08-31', freq='daily', region='mx')

100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [00:52<00:00,  1.69s/it]


In [76]:
chart['Track Id'] = chart['URL'].str.split("/",expand=True)[4]

In [77]:
features = []
track_data = []
spotify_attr = ['Track Id', 'acousticness', 'danceability',
                'duration_ms', 'energy', 'instrumentalness',
                'key', 'liveness', 'loudness', 'mode',
                'speechiness', 'tempo', 'time_signature',
                'valence']
token = "BQBGWirS3r8wJpZet5vEMJhKDK-kDdqvschdkOr9q0fNjTBCqMk9e3fZyCsjgLUzHwVPIfOKeLDWP7eBMlBfUz3ok625ayiLy6YknQ2T04SZwPXzaosX_eqyq2nhkJRSihiDErQUXxO8F9RdOKgGZ_b9z6qQxba7btmRzDMlVg"
chart_unique = chart.drop_duplicates('Track Id')
for index, row in tqdm.tqdm(chart_unique.iterrows(), total=chart_unique.shape[0]):
    JSONContent = requests.get("https://api.spotify.com/v1/audio-features/" + row['Track Id'],
    headers={
        "Accept": "application/json",
        "Authorization": "Bearer " + token,
        "Content-Type": "application/json"
    },
    cookies={},
    ).json()
    track_data.append(row['Track Id'])
    
    for attr in spotify_attr[1:]:
        track_data.append(JSONContent[attr])
    features.append(track_data)
    track_data = []
    dataset = pd.DataFrame(features)

100%|██████████████████████████████████████████████████████████████████████████████████| 71/71 [00:30<00:00,  2.34it/s]


In [78]:
dataset.columns = spotify_attr

In [79]:
test = pd.merge(chart, dataset, on='Track Id', how='left')

In [80]:
test

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Date,Track Id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,1,China,Anuel AA,798322,https://open.spotify.com/track/2ksOAxtIxY8yElE...,2019-08-01,2ksOAxtIxY8yElEWw8RhgK,0.0846,0.786,301714,0.808,0.000289,7,0.0822,-3.702,1,0.0882,105.027,4,0.609
1,2,Callaita,Bad Bunny,678038,https://open.spotify.com/track/2TH65lNHgvLxCKX...,2019-08-01,2TH65lNHgvLxCKXM3apjxI,0.6000,0.610,250534,0.624,0.000002,2,0.2430,-4.773,1,0.3090,176.169,4,0.244
2,3,LA CANCIÓN,J Balvin,596805,https://open.spotify.com/track/0fea68AdmYNygeT...,2019-08-01,0fea68AdmYNygeTGI4RC18,0.1520,0.754,242573,0.646,0.000018,7,0.1080,-5.795,1,0.3170,176.089,4,0.429
3,4,11 PM,Maluma,521059,https://open.spotify.com/track/7KbF6AdprOXEEHl...,2019-08-01,7KbF6AdprOXEEHlsq11Z6d,0.2170,0.777,175733,0.712,0.000000,10,0.0910,-4.840,1,0.2770,95.692,4,0.680
4,5,Otro Trago,Sech,458509,https://open.spotify.com/track/1Ej96GIBCTvgH7t...,2019-08-01,1Ej96GIBCTvgH7tNX1r3qr,0.1360,0.747,225933,0.700,0.000167,0,0.1100,-4.669,1,0.3410,176.036,4,0.620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1545,46,Adan y Eva,Paulo Londra,170214,https://open.spotify.com/track/132ALUzVLmqYB4U...,2019-08-31,132ALUzVLmqYB4UsBj5qD6,0.3230,0.767,256972,0.709,0.000000,1,0.0745,-4.470,1,0.3360,171.993,4,0.720
1546,47,Dancin (feat. Luvli) - Krono Remix,Aaron Smith,169687,https://open.spotify.com/track/6WkJ2OK163XXS2o...,2019-08-31,6WkJ2OK163XXS2oARUC9JM,0.1010,0.720,198053,0.741,0.000014,10,0.2890,-6.529,0,0.0287,119.896,4,0.357
1547,48,Si Se Da - Remix,Myke Towers,164654,https://open.spotify.com/track/6K5BsR04ijf3FHN...,2019-08-31,6K5BsR04ijf3FHNzjbaagD,0.4410,0.817,332240,0.672,0.000000,11,0.1300,-5.612,0,0.2990,93.999,4,0.774
1548,49,No Te Contaron Mal,Christian Nodal,163366,https://open.spotify.com/track/7hbk4BaF66O09R1...,2019-08-31,7hbk4BaF66O09R1gq0bKwj,0.3690,0.644,156520,0.568,0.000000,9,0.1510,-3.077,0,0.0380,81.409,4,0.654


In [81]:
len(test['Date'].unique())

31

In [82]:
test.describe()

Unnamed: 0,Position,Streams,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
count,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0,1550.0
mean,25.5,264939.030968,0.237194,0.750025,222300.632258,0.713768,0.012168,6.321935,0.15796,-4.614657,0.569032,0.139357,127.033558,4.013548,0.67996
std,14.435527,141141.124887,0.205517,0.099544,44938.568201,0.133998,0.067223,3.868259,0.125984,2.256909,0.495371,0.10507,36.522605,0.225426,0.17332
min,1.0,127256.0,0.00554,0.4,143292.0,0.159,0.0,0.0,0.0302,-14.461,0.0,0.0287,74.93,3.0,0.189
25%,13.0,169774.75,0.0711,0.701,190800.0,0.661,0.0,2.0,0.0801,-5.041,0.0,0.0542,94.052,4.0,0.609
50%,25.5,207848.5,0.205,0.759,207400.0,0.736,0.0,7.0,0.108,-4.218,1.0,0.102,110.567,4.0,0.706
75%,38.0,307150.25,0.323,0.826,256972.0,0.791,2e-05,10.0,0.188,-3.33,1.0,0.204,170.163,4.0,0.797
max,50.0,866718.0,0.946,0.885,360960.0,0.972,0.465,11.0,0.671,-1.681,1.0,0.461,205.789,5.0,0.96


In [83]:
chart_unique.loc[:9,:]

Unnamed: 0,Position,Track Name,Artist,Streams,URL,Date,Track Id
0,1,China,Anuel AA,798322,https://open.spotify.com/track/2ksOAxtIxY8yElE...,2019-08-01,2ksOAxtIxY8yElEWw8RhgK
1,2,Callaita,Bad Bunny,678038,https://open.spotify.com/track/2TH65lNHgvLxCKX...,2019-08-01,2TH65lNHgvLxCKXM3apjxI
2,3,LA CANCIÓN,J Balvin,596805,https://open.spotify.com/track/0fea68AdmYNygeT...,2019-08-01,0fea68AdmYNygeTGI4RC18
3,4,11 PM,Maluma,521059,https://open.spotify.com/track/7KbF6AdprOXEEHl...,2019-08-01,7KbF6AdprOXEEHlsq11Z6d
4,5,Otro Trago,Sech,458509,https://open.spotify.com/track/1Ej96GIBCTvgH7t...,2019-08-01,1Ej96GIBCTvgH7tNX1r3qr
5,6,Soltera - Remix,Lunay,457582,https://open.spotify.com/track/1c0hsvHLELX6y8q...,2019-08-01,1c0hsvHLELX6y8qymnpLKL
6,7,No Me Conoce - Remix,Jhay Cortez,448450,https://open.spotify.com/track/4R8BJggjosTswLx...,2019-08-01,4R8BJggjosTswLxtkw8V7P
7,8,Otro Trago - Remix,Sech,443936,https://open.spotify.com/track/4bTZeO72FwMa6wK...,2019-08-01,4bTZeO72FwMa6wKOiqoynL
8,9,Señorita,Shawn Mendes,438091,https://open.spotify.com/track/6v3KW9xbzN5yKLt...,2019-08-01,6v3KW9xbzN5yKLt9YKDYA2
9,10,Con Altura,ROSALÍA,388738,https://open.spotify.com/track/2qG5sZ7Si6sdK74...,2019-08-01,2qG5sZ7Si6sdK74qLxedYM


In [84]:
# http://yaoyao.codes/pandas/2018/01/23/pandas-split-a-dataframe-into-chunks
def index_marks(nrows, chunk_size):
    return range(1 * chunk_size, (nrows // chunk_size + 1) * chunk_size, chunk_size)

def split(dfm, chunk_size):
    indices = index_marks(dfm.shape[0], chunk_size)
    return np.split(dfm, indices)

chunks = split(chart_unique, 100)
print(len(chunks))

1


In [71]:
test.to_csv('C:/Users/manol/Desktop/GitHub/Lyricfy/dataset-Agos.csv', encoding='utf-8-sig')

In [86]:
test.index.name = 'index'

In [89]:
test.to_csv('C:/Users/manol/Desktop/GitHub/Lyricfy/dataset-Agos.csv', encoding='utf-8-sig')

In [92]:
df = pd.read_csv('C:/Users/manol/Desktop/GitHub/Lyricfy/dataset-Agos.csv', index_col='index')

In [93]:
df

Unnamed: 0_level_0,Position,Track Name,Artist,Streams,URL,Date,Track Id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,1,China,Anuel AA,798322,https://open.spotify.com/track/2ksOAxtIxY8yElE...,2019-08-01,2ksOAxtIxY8yElEWw8RhgK,0.0846,0.786,301714,0.808,0.000289,7,0.0822,-3.702,1,0.0882,105.027,4,0.609
1,2,Callaita,Bad Bunny,678038,https://open.spotify.com/track/2TH65lNHgvLxCKX...,2019-08-01,2TH65lNHgvLxCKXM3apjxI,0.6000,0.610,250534,0.624,0.000002,2,0.2430,-4.773,1,0.3090,176.169,4,0.244
2,3,LA CANCIÓN,J Balvin,596805,https://open.spotify.com/track/0fea68AdmYNygeT...,2019-08-01,0fea68AdmYNygeTGI4RC18,0.1520,0.754,242573,0.646,0.000018,7,0.1080,-5.795,1,0.3170,176.089,4,0.429
3,4,11 PM,Maluma,521059,https://open.spotify.com/track/7KbF6AdprOXEEHl...,2019-08-01,7KbF6AdprOXEEHlsq11Z6d,0.2170,0.777,175733,0.712,0.000000,10,0.0910,-4.840,1,0.2770,95.692,4,0.680
4,5,Otro Trago,Sech,458509,https://open.spotify.com/track/1Ej96GIBCTvgH7t...,2019-08-01,1Ej96GIBCTvgH7tNX1r3qr,0.1360,0.747,225933,0.700,0.000167,0,0.1100,-4.669,1,0.3410,176.036,4,0.620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1545,46,Adan y Eva,Paulo Londra,170214,https://open.spotify.com/track/132ALUzVLmqYB4U...,2019-08-31,132ALUzVLmqYB4UsBj5qD6,0.3230,0.767,256972,0.709,0.000000,1,0.0745,-4.470,1,0.3360,171.993,4,0.720
1546,47,Dancin (feat. Luvli) - Krono Remix,Aaron Smith,169687,https://open.spotify.com/track/6WkJ2OK163XXS2o...,2019-08-31,6WkJ2OK163XXS2oARUC9JM,0.1010,0.720,198053,0.741,0.000014,10,0.2890,-6.529,0,0.0287,119.896,4,0.357
1547,48,Si Se Da - Remix,Myke Towers,164654,https://open.spotify.com/track/6K5BsR04ijf3FHN...,2019-08-31,6K5BsR04ijf3FHNzjbaagD,0.4410,0.817,332240,0.672,0.000000,11,0.1300,-5.612,0,0.2990,93.999,4,0.774
1548,49,No Te Contaron Mal,Christian Nodal,163366,https://open.spotify.com/track/7hbk4BaF66O09R1...,2019-08-31,7hbk4BaF66O09R1gq0bKwj,0.3690,0.644,156520,0.568,0.000000,9,0.1510,-3.077,0,0.0380,81.409,4,0.654
