# Notebook that retrieves API data related to tracks

## Imports

In [None]:
import base64
import pandas as pd
import requests
from time import sleep

## Methods

In [None]:
# Split a list into chunks of specified size
def chunk_list(data: List[Any], chunk_size: int = 0, max_chunk: int = None):
    if chunk_size > 0:
        size = chunk_size
        if max_chunk is not None:
            size = min(chunk_size, max_chunk)
    elif max_chunk is not None and max_chunk > 0:
        size = max_chunk
    else:
        size = 100  # default chunk size

    for i in range(0, len(data), size):
        yield data[i:i + size]

## Reading dataset

In [None]:
df_treated = pd.read_csv("final_df.csv")

## ReccoBeats APIs - [Get multiple track](https://reccobeats.com/docs/apis/get-tracks) and [Get multiple audio features](https://reccobeats.com/docs/apis/get-audio-features)

In [None]:
unique_track_id_list = df_treated["track_id"].unique()

headers = {
  'Accept': 'application/json'
}

audio_features_dataframes = []
multiple_track_dataframes = []

for chunk in chunk_list(unique_track_id_list, 40):
    # print(chunk)
    print(f"Len: {len(chunk)}")

    url_audio_features = "https://api.reccobeats.com/v1/audio-features?ids="
    url_multiple_track = "https://api.reccobeats.com/v1/track?ids="

    track_ids_str = ",".join([track_id for track_id in chunk])

    url_audio_features += track_ids_str
    url_multiple_track += track_ids_str

    response_audio_features = requests.get(url=url_audio_features, headers=headers)
    response_multiple_track = requests.get(url=url_multiple_track, headers=headers)
    
    audio_features_dataframes.append(pd.json_normalize(response_audio_features.json(), record_path="content"))
    multiple_track_dataframes.append(pd.json_normalize(response_multiple_track.json(), record_path="content"))

df_audio_features = pd.concat(audio_features_dataframes, ignore_index=True)
df_multiple_track = pd.concat(multiple_track_dataframes, ignore_index=True)

df_audio_features["track_id"] = df_audio_features["href"].str.rpartition("/")[2]
df_multiple_track["track_id"] = df_multiple_track["href"].str.rpartition("/")[2]

## Checking results

In [None]:
display(df_audio_features.head(5))
display(df_multiple_track.head(5))

## Merging dataframes

In [None]:
df_final = df_treated.merge(
    df_audio_features[["track_id", "acousticness", "danceability", "energy", "instrumentalness", "key", "liveness", "loudness", "mode", "speechiness", "tempo", "valence"]], # campos a serem trazidos do df_audio_features
    on="track_id", # track_id tem que tar na lista de cima
    how="left"
)

df_final = df_treated.merge(
    df_multiple_track[["track_id", "popularity"]], # campos a serem trazidos do df_multiple_track
    on="track_id", # track_id tem que tar na lista de cima
    how="left"
)