In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os 
import dotenv
import requests
import pandas as pd
import time


## Caminho da pasta

In [None]:
base_path = "my_spotify_songs"

In [3]:
dotenv.load_dotenv()

client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

API_KEY = os.getenv('API_KEY')
BASE_URL = "http://ws.audioscrobbler.com/2.0/"

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

## Funções

In [4]:
session = requests.Session()

In [5]:
def buscar_tags(params):
    response = session.get(BASE_URL, params=params)
    return response.json()

In [6]:
def get_genre_by_name(artista):
    
    # tenta pelas tags do artista
    params_artist = {
        "method": "artist.getTopTags",
        "artist": artista,
        "api_key": API_KEY,
        "format": "json"
    }
    
    data_artist = buscar_tags(params_artist)
    
    try:
        tags = data_artist["toptags"]["tag"]
        return tags[0]['name']   
    except:
        return None



In [8]:
def get_genre_by_artist(artist):

    artist_info = sp.search(
        q = artist,
        type= 'artist',
        limit= 1
        )

    try:
        genero =  artist_info['artists']['items'][0]['genres'][0]
        return genero
    except IndexError:
        return None

In [9]:
def get_genre(artista):
    genero = get_genre_by_artist(artista)
    if genero:
        return genero
    
    return get_genre_by_name(artista)

## Lógica de criação do DF 

In [10]:
dfs = []

files  = os.listdir(base_path)
# Percorrendo arquivos
for file in files:
    if file.startswith("Streaming_History_Audio") and file.endswith(".json"):
        file_path = os.path.join(base_path, file)
        
        df_temp = pd.read_json(file_path)
        dfs.append(df_temp)

df = pd.concat(dfs, ignore_index=True)

#### ATENÇÃO!!!!  Vai demorar (Ou não)

In [11]:
df = df.dropna(subset=['spotify_track_uri', 'master_metadata_album_artist_name'])
df['ts'] = pd.to_datetime(df['ts'])
df = df[df['ts'] >= '2023-01-01']

In [12]:
df_unico = df[['master_metadata_album_artist_name']]
df_unico = df_unico.drop_duplicates()


In [13]:
df_unico = df[['master_metadata_album_artist_name']]
df_unico = df_unico.drop_duplicates()


df_unico['genero'] = df_unico.apply(
    lambda row: get_genre(
        row['master_metadata_album_artist_name']
    ), 
    axis=1
)

display(df_unico)

Unnamed: 0,master_metadata_album_artist_name,genero
2168,Teto,brazilian trap
2170,MC Cabelinho,brazilian trap
2171,Dfideliz,brazilian trap
2172,Aka Rasta,brazilian trap
2173,DNASTY,brazilian trap
...,...,...
24589,Foo Fighters,rock
24591,The Killers,alternative rock
24605,Duncan Laurence,pop
24611,Everybody Loves an Outlaw,USA


In [14]:
df_final = pd.merge(
    df,
    df_unico,
    how= 'inner',
    on= 'master_metadata_album_artist_name'
)

In [15]:
display(df_final)

Unnamed: 0,ts,platform,ms_played,conn_country,ip_addr,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,episode_name,...,audiobook_chapter_uri,audiobook_chapter_title,reason_start,reason_end,shuffle,skipped,offline,offline_timestamp,incognito_mode,genero
0,2023-01-10 14:08:28+00:00,android,4466,BR,187.122.59.170,Mustang Preto,Teto,Mustang Preto,spotify:track:6Vgn3IEUkWs3d6og029Gtm,,...,,,appload,logout,True,False,False,1.673360e+09,False,brazilian trap
1,2023-01-10 14:11:37+00:00,android,178879,BR,187.122.59.170,Mustang Preto,Teto,Mustang Preto,spotify:track:6Vgn3IEUkWs3d6og029Gtm,,...,,,appload,trackdone,True,False,False,1.673360e+09,False,brazilian trap
2,2023-01-10 14:16:55+00:00,android,217539,BR,187.122.59.170,VALHO NADA,MC Cabelinho,LITTLE LOVE,spotify:track:18XOY618zQH4bFw3TgVT9L,,...,,,trackdone,trackdone,True,False,False,1.673360e+09,False,brazilian trap
3,2023-01-10 14:21:37+00:00,android,161311,BR,187.122.59.170,Progresso,Dfideliz,Progresso,spotify:track:0tLRlU39HHqJ2BX4DN4qnm,,...,,,trackdone,trackdone,True,False,False,1.673360e+09,False,brazilian trap
4,2023-01-10 14:29:10+00:00,android,150932,BR,187.122.59.170,Romântico 2,Aka Rasta,ZIMA,spotify:track:6OPqR2PJZpPlWVZ4IOyljf,,...,,,trackerror,trackdone,True,False,False,1.673361e+09,False,brazilian trap
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22424,2026-02-02 23:45:30+00:00,android,166028,BR,2804:214:8603:1d1f:1:0:3af4:9fdd,Head & Heart (feat. MNEK),Joel Corry,Head & Heart (feat. MNEK),spotify:track:6cx06DFPPHchuUAcTxznu9,,...,,,trackdone,trackdone,True,False,False,1.770076e+09,True,dance pop
22425,2026-02-02 23:47:37+00:00,android,126346,BR,2804:214:8603:1d1f:1:0:3af4:9fdd,Look At Me!,XXXTENTACION,Look At Me!,spotify:track:7floNISpH8VF4z4459Qo18,,...,,,trackdone,trackdone,True,False,False,1.770076e+09,True,emo rap
22426,2026-02-02 23:51:09+00:00,android,212353,BR,2804:214:8603:1d1f:1:0:3af4:9fdd,INDUSTRY BABY (feat. Jack Harlow),Lil Nas X,MONTERO,spotify:track:5Z9KJZvQzH6PFmb8SNkxuk,,...,,,trackdone,trackdone,True,False,False,1.770076e+09,True,Hip-Hop
22427,2026-02-02 23:54:49+00:00,android,219320,BR,2804:214:8603:1d1f:1:0:3af4:9fdd,Work,Rihanna,ANTI,spotify:track:72TFWvU3wUYdUuxejTTIzt,,...,,,trackdone,trackdone,True,False,False,1.770076e+09,True,pop


## Salvando na maquina

In [None]:
df_final.to_csv('spotify_songs.csv', index=False)