In [0]:
from requests import post, get
import datetime, time ,base64, urllib.parse, webbrowser, requests, json, time
from src.utils.token_and_auth import Auth
from src.utils.auth_and_token_utils import  get_valid_token 

In [0]:
auth = Auth()
token = get_valid_token()
HEADERS = auth.get_auth_header(token)

BASE_URL ="https://api.spotify.com/v1"
after_ms = int(datetime.datetime(2025, 10, 1, 0, 0).timestamp() * 1000)
print(after_ms)
params = {"limit": 50}

current_time = datetime.datetime.now()

In [0]:
def chunk_list(data, size):
    """Împarte o listă în bucăți mai mici."""
    for i in range(0, len(data), size):
        yield data[i:i + size]

# --- FUNCȚIILE DE EXTRAGERE (ETL) ---

def get_play_history(headers, unique_track_ids, unique_artist_ids):
    """
    Sursa A: Obține istoricul ascultărilor.
    """
    print("Fetching play history...")
    play_history_data = []
    url = f"{BASE_URL}/me/player/recently-played?limit=50"
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        results = response.json()
        
        for item in results.get('items', []):
            
            # Verificare de siguranță (din răspunsul anterior)
            if not item:
                print("Warning: Am găsit un 'item' nul în istoric. Se omite.")
                continue

            track = item.get('track')
            if not track:
                continue

            artist = track['artists'][0] if track.get('artists') else {}
            
            play_history_data.append({
                'played_at': item.get('played_at'),
                'track_id': track.get('id'),
                'track_name': track.get('name'),
                'artist_id': artist.get('id'),
                'artist_name': artist.get('name'),
                'album_id': track.get('album', {}).get('id'),
                'album_name': track.get('album', {}).get('name'),
                
                # --- AICI ESTE MODIFICAREA ---
                'context_type': (item.get('context') or {}).get('type'),
                
                'duration_ms': track.get('duration_ms'),
            })
            
            # Adaugă ID-urile la seturile unice
            unique_track_ids.add(track.get('id'))
            for art in track.get('artists', []):
                if art:
                    unique_artist_ids.add(art.get('id'))
                
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching play history: {e}")
        if e.response.status_code == 401:
            print("EROARE: Token-ul a expirat sau este invalid.")
            
    return play_history_data

def get_playlists_and_tracks(headers, unique_track_ids, unique_artist_ids):
    """
    Sursa B: Obține playlist-urile și piesele lor.
    Gestionează paginarea manual.
    """
    print("Fetching playlists and their tracks...")
    playlists_data = []
    playlist_tracks_data = []
    
    next_url = f"{BASE_URL}/me/playlists"
    
    try:
        # 1. Bucla exterioară: Paginarea prin PLAYLIST-URI
        while next_url:
            response = requests.get(next_url, headers=headers)
            response.raise_for_status()
            playlists = response.json()

            for playlist in playlists.get('items', []):
                playlist_id = playlist.get('id')
                
                # Populează bronze_playlists
                playlists_data.append({
                    'playlist_id': playlist_id,
                    'playlist_name': playlist.get('name'),
                    'owner_name': playlist.get('owner', {}).get('display_name'),
                    'followers': playlist.get('followers', {}).get('total'),
                    'total_tracks': playlist.get('tracks', {}).get('total'),
                    'description': playlist.get('description'),
                    'snapshot_id': playlist.get('snapshot_id'),
                })
                
                # 2. Bucla interioară: Paginarea prin PIESELE din playlist
                tracks_url = f"{BASE_URL}/playlists/{playlist_id}/tracks"
                while tracks_url:
                    tracks_response = requests.get(tracks_url, headers=headers)
                    tracks_response.raise_for_status()
                    tracks = tracks_response.json()
                    
                    for item in tracks.get('items', []):
                        track = item.get('track')
                        if not track or not track.get('id'):
                            continue 
                            
                        artist_ids = [a['id'] for a in track.get('artists', [])]
                        
                        # Populează tabelul de piese din playlist
                        playlist_tracks_data.append({
                            'playlist_id': playlist_id,
                            'track_id': track.get('id'),
                            'track_name': track.get('name'),
                            'artist_ids': artist_ids,
                            'album_id': track.get('album', {}).get('id'),
                            'added_at': item.get('added_at'),
                            'added_by': item.get('added_by', {}).get('id'),
                            'duration_ms': track.get('duration_ms'),
                            'popularity': track.get('popularity'),
                        })
                        
                        # Adaugă ID-urile la seturile unice
                        unique_track_ids.add(track.get('id'))
                        for art_id in artist_ids:
                            unique_artist_ids.add(art_id)
                            
                    tracks_url = tracks.get('next') # Obține următorul URL pentru piese

            next_url = playlists.get('next') # Obține următorul URL pentru playlist-uri

    except requests.exceptions.HTTPError as e:
        print(f"Error fetching playlists: {e}")

    return playlists_data, playlist_tracks_data

def get_full_metadata(headers, unique_track_ids, unique_artist_ids):
    """
    Pasul 3: Folosește ID-urile unice colectate pentru a popula
    `bronze_tracks`, `bronze_artists`, și `bronze_audio_features`.
    """
    print(f"Fetching metadata for {len(unique_track_ids)} unique tracks and {len(unique_artist_ids)} unique artists...")
    
    tracks_data = []
    artists_data = []
    audio_features_data = []
    
    track_ids_list = list(unique_track_ids)
    artist_ids_list = list(unique_artist_ids)

    # 1. Obține bronze_tracks (batch-uri de 50)
    try:
        url = f"{BASE_URL}/tracks"
        for batch in chunk_list(track_ids_list, 50):
            params = {'ids': ','.join(batch)}
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            tracks_results = response.json()
            
            for track in tracks_results.get('tracks', []):
                tracks_data.append({
                    'track_id': track.get('id'),
                    'track_name': track.get('name'),
                    # ... (restul câmpurilor ca în exemplul spotipy)
                })
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching tracks metadata: {e}")

    # 2. Obține bronze_artists (batch-uri de 50)
    try:
        url = f"{BASE_URL}/artists"
        for batch in chunk_list(artist_ids_list, 50):
            params = {'ids': ','.join(batch)}
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            artists_results = response.json()

            for artist in artists_results.get('artists', []):
                artists_data.append({
                    'artist_id': artist.get('id'),
                    'artist_name': artist.get('name'),
                    'genres': artist.get('genres'),
                    # ... (restul câmpurilor)
                })
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching artists metadata: {e}")

    # 3. Obține bronze_audio_features (batch-uri de 100)
    try:
        url = f"{BASE_URL}/audio-features"
        for batch in chunk_list(track_ids_list, 100):
            params = {'ids': ','.join(batch)}
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            features_results = response.json()
            
            for features in features_results.get('audio_features', []):
                if features:
                    audio_features_data.append({
                        'track_id': features.get('id'),
                        'danceability': features.get('danceability'),
                        # ... (restul câmpurilor)
                    })
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching audio features: {e}")

    # Am scurtat mapările din funcția de mai sus pentru concizie,
    # dar logica de extragere a câmpurilor este identică cu cea din
    # scriptul spotipy
        
    print(f"Successfully fetched tracks: {len(tracks_data)}, artists: {len(artists_data)}, features: {len(audio_features_data)}")
    return tracks_data, artists_data, audio_features_data

In [0]:

# Seturi pentru a stoca ID-uri unice
unique_track_ids = set()
unique_artist_ids = set()

# Rularea funcțiilor de extragere
play_history_table = get_play_history(HEADERS, unique_track_ids, unique_artist_ids)
playlists_table, playlist_tracks_table = get_playlists_and_tracks(HEADERS, unique_track_ids, unique_artist_ids)

# Rularea funcției de îmbogățire
tracks_table, artists_table, audio_features_table = get_full_metadata(HEADERS, unique_track_ids, unique_artist_ids)

print("\n--- REZUMAT EXTRAGERE ---")
print(f"Date bronze_play_history:   {len(play_history_table)} rânduri")
print(f"Date bronze_playlists:      {len(playlists_table)} rânduri")
print(f"Date playlist_tracks:       {len(playlist_tracks_table)} rânduri")
print("-----------------------------")
print(f"Date bronze_tracks:         {len(tracks_table)} rânduri")
print(f"Date bronze_artists:        {len(artists_table)} rânduri")
print(f"Date bronze_audio_features: {len(audio_features_table)}")

# Puteți salva aceste liste de dicționare în fișiere JSON/CSV
# sau le puteți încărca într-o bază de date.