### **Coletor de Gênero**
#### **Objetivo**: Preparar a base para as análises preditivas e discritivas 

##### Imports

In [10]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os 
import dotenv
import requests
import pandas as pd


##### ENVs

In [11]:
dotenv.load_dotenv()

client_id = os.getenv('CLIENT_ID')
client_secret = os.getenv('CLIENT_SECRET')

API_KEY = os.getenv('API_KEY')
BASE_URL = "http://ws.audioscrobbler.com/2.0/"

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

##### Caminho da pasta

In [12]:
base_path = "Spotify Extended Streaming History"

In [13]:
session = requests.Session()

##### Funções

In [14]:
def buscar_tags(params):
    response = session.get(BASE_URL, params=params)
    return response.json()

In [15]:
def get_genre_by_name(artista):
    
    # tenta pelas tags do artista
    params_artist = {
        "method": "artist.getTopTags",
        "artist": artista,
        "api_key": API_KEY,
        "format": "json"
    }
    
    data_artist = buscar_tags(params_artist)
    
    try:
        tags = data_artist["toptags"]["tag"]
        return tags[0]['name']   
    except:
        return None



In [16]:
def get_genre_by_artist(artist):

    artist_info = sp.search(
        q = artist,
        type= 'artist',
        limit= 1
        )

    try:
        genero =  artist_info['artists']['items'][0]['genres'][0]
        return genero
    except IndexError:
        return None

In [17]:
def get_genre(artista):
    genero = get_genre_by_artist(artista)
    if genero:
        return genero
    
    return get_genre_by_name(artista)

##### Coletando bases

In [18]:
dfs = []

files  = os.listdir(base_path)
# Percorrendo arquivos
for file in files:
    if file.startswith("Streaming_History_Audio") and file.endswith(".json"):
        file_path = os.path.join(base_path, file)
        
        df_temp = pd.read_json(file_path)
        dfs.append(df_temp)

df = pd.concat(dfs, ignore_index=True)

##### Criação do DF 

In [19]:
df = df.dropna(subset=['spotify_track_uri', 'master_metadata_album_artist_name'])
df['ts'] = pd.to_datetime(df['ts'])
df = df[df['ts'] >= '2023-01-01']

In [20]:
df_unico = df[['master_metadata_album_artist_name']]
df_unico = df_unico.drop_duplicates()


df_unico['genero'] = df_unico.apply(
    lambda row: get_genre(
        row['master_metadata_album_artist_name']
    ), 
    axis=1
)

display(df_unico)

KeyboardInterrupt: 

In [None]:
df_final = pd.merge(
    df,
    df_unico,
    how= 'inner',
    on= 'master_metadata_album_artist_name'
)

In [None]:
display(df_final)

Unnamed: 0,ts,platform,ms_played,conn_country,ip_addr,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,episode_name,...,audiobook_chapter_uri,audiobook_chapter_title,reason_start,reason_end,shuffle,skipped,offline,offline_timestamp,incognito_mode,genero
0,2023-04-04 18:25:30+00:00,android,11451,BR,177.76.148.44,Talk that Talk,TWICE,BETWEEN 1&2,spotify:track:0RDqNCRBGrSegk16Avfzuq,,...,,,playbtn,endplay,False,True,False,1680632717,False,k-pop
1,2023-04-04 18:25:47+00:00,android,16559,BR,177.76.148.44,There's Nothing Holdin' Me Back,Shawn Mendes,Illuminate,spotify:track:7JJmb5XwzOO8jgpou264Ml,,...,,,playbtn,endplay,False,True,False,1680632730,False,pop
2,2023-04-04 18:25:55+00:00,android,4168,BR,177.76.148.44,Woman,Doja Cat,Planet Her,spotify:track:6Uj1ctrBOjOas8xZXGqKk4,,...,,,playbtn,endplay,False,True,False,1680632747,False,pop
3,2023-04-04 19:23:17+00:00,android,80840,BR,177.76.148.44,FLOWER,JISOO,ME,spotify:track:69CrOS7vEHIrhC2ILyEi0s,,...,,,clickrow,logout,False,False,False,1680635513,False,k-pop
4,2023-04-04 22:51:39+00:00,android,67760,BR,201.42.207.140,FLOWER,JISOO,ME,spotify:track:69CrOS7vEHIrhC2ILyEi0s,,...,,,appload,logout,False,False,False,1680647986,False,k-pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25992,2026-01-29 22:55:17+00:00,android,122973,BR,2804:214:801a:7f2d:69b2:36e:d1f0:f2cf,Famoso Machuca Xota,Mc Larissa,Famoso Machuca Xota,spotify:track:5u0iZ81Yf2L90FQLYQUiiG,,...,,,clickrow,endplay,False,True,False,1769727189,False,brega funk
25993,2026-01-29 22:55:20+00:00,android,2639,BR,2804:214:801a:7f2d:69b2:36e:d1f0:f2cf,"Tá Duvidando Mulher, Nós Vareta Vareta",DJ 7W,"Tá Duvidando Mulher, Nós Vareta Vareta",spotify:track:3WSkOgnOcDcZJIROnkCNgW,,...,,,clickrow,endplay,False,True,False,1769727317,False,brazilian funk
25994,2026-01-29 22:56:55+00:00,android,96005,BR,2804:214:801a:7f2d:69b2:36e:d1f0:f2cf,Montagem Ritmo Brutal,MXZI,Montagem Ritmo Brutal,spotify:track:303TM5rMlsvVUSEwGAFbkX,,...,,,clickrow,trackdone,False,False,False,1769727320,False,phonk
25995,2026-01-29 22:57:07+00:00,android,9907,BR,2804:214:801a:7f2d:69b2:36e:d1f0:f2cf,Agudo Mágico 6,MC K.K,Agudo Mágico 6,spotify:track:67wjqWn5uryixWItcRLi62,,...,,,trackdone,endplay,False,True,False,1769727415,False,brazilian funk


#### Salvando na maquina

In [None]:
df_final.to_csv('spotify_songs.csv', index=False)