Connessione a spotipy

In [19]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import json
import os 

# Configura le tue credenziali Spotify
client_id = os.getenv('YOUR_CLIENT_ID')
client_secret = os.getenv('YOUR_CLIENT_SECRET')
redirect_uri = os.getenv('YOUR_REDIRECT_URI')

scope = "user-library-read playlist-read-private"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id,
                                               client_secret=client_secret,
                                               redirect_uri=redirect_uri,
                                               scope=scope))

In [26]:
def get_audio_features_and_analysis(track_ids, audio_analysis=False):
    # Maximum 50 track_ids for track info
    track_infos = sp.tracks(track_ids)['tracks']
    
    # Maximum 100 track_ids for audio features
    features = sp.audio_features(track_ids)
    
    track_data = {
        'album': [track['album']['name'] for track in track_infos],
        'artist': [track['artists'][0]['name'] for track in track_infos],
        'duration_s': [track['duration_ms'] / 1000 for track in track_infos],
        'name': [track['name'] for track in track_infos],
        'popularity': [track['popularity'] for track in track_infos],
        'id': [track['id'] for track in track_infos],
        # audio features
        'acousticness': [feature['acousticness'] for feature in features],
        'danceability': [feature['danceability'] for feature in features],
        'energy': [feature['energy'] for feature in features],
        'instrumentalness': [feature['instrumentalness'] for feature in features],
        'key': [feature['key'] for feature in features],
        'liveness': [feature['liveness'] for feature in features],
        'loudness': [feature['loudness'] for feature in features],
        'mode': [feature['mode'] for feature in features],
        'speechiness': [feature['speechiness'] for feature in features],
        'valence': [feature['valence'] for feature in features],
        'tempo': [feature['tempo'] for feature in features],
        'time_signature': [feature['time_signature'] for feature in features],
    }

    if audio_analysis:
        analyses = [sp.audio_analysis(track['id']) for track in track_infos]
        track_data.update({
            'segments': [len(analysis['segments']) for analysis in analyses],
            'bars': [len(analysis['bars']) for analysis in analyses],
            'beats': [len(analysis['beats']) for analysis in analyses],
            'sections': [len(analysis['sections']) for analysis in analyses],
            'tatums': [len(analysis['tatums']) for analysis in analyses]
        })
    
    return track_data



In [None]:
# Initialize an empty list to store the results
results = []

# Set the batch size (max 50 for Spotify API)
batch_size = 5

# Get the total number of saved tracks
total_tracks = sp.current_user_saved_tracks(limit=1)['total']
print(f"Total tracks: {total_tracks}")

total_tracks = 5
# Loop through batches of saved tracks
for offset in range(0, total_tracks, batch_size):
    tracks = sp.current_user_saved_tracks(limit=batch_size, offset=offset)
    track_ids = [item['track']['id'] for item in tracks['items']]
    print(f"Track IDs: {track_ids}")    
    # Apply the get_audio_features_and_analysis function
    track_data = get_audio_features_and_analysis(track_ids)
    # Append the result to the list
    results.append(track_data)

# Print or process the results as needed
for result in results:
    print("---")
    print(json.dumps(result, indent=4))

In [None]:
import pickle
import json
import os

def update_pickle(new_data, pickle_file='spotify_data.pickle'):
    if os.path.exists(pickle_file):
        with open(pickle_file, 'rb') as f:
            existing_data = pickle.load(f)
        for key in new_data:
            if key in existing_data:
                existing_data[key].extend(new_data[key])
            else:
                existing_data[key] = new_data[key]
    else:
        existing_data = new_data
    
    with open(pickle_file, 'wb') as f:
        pickle.dump(existing_data, f)

# Set the batch size (max 50 for Spotify API)
batch_size = 5

# Get the total number of saved tracks
total_tracks = sp.current_user_saved_tracks(limit=1)['total']
print(f"Total tracks: {total_tracks}")

# Uncomment the next line if you want to limit the number of tracks for testing
# total_tracks = 5

# Loop through batches of saved tracks
for offset in range(0, total_tracks, batch_size):
    tracks = sp.current_user_saved_tracks(limit=batch_size, offset=offset)
    track_ids = [item['track']['id'] for item in tracks['items']]
    print(f"Processing track IDs: {track_ids}")    
    
    # Apply the get_audio_features_and_analysis function
    track_data = get_audio_features_and_analysis(track_ids)
    
    # Update the pickle file with this batch of data
    update_pickle(track_data)
    
    print(f"Processed and saved tracks {offset} to {min(offset+batch_size, total_tracks)}")
    
    # Optional: print the data for this batch
    print("---")
    print(json.dumps(track_data, indent=4))

print("All data processed and saved.")

# If you want to read and display all the data at the end:
with open('spotify_data.pickle', 'rb') as f:
    all_data = pickle.load(f)

print("\nAll data from pickle file:")
print(json.dumps(all_data, indent=4))


In [3]:
# Create a directory to store the JSON files if it doesn't exist
if not os.path.exists('temp_data'):
    os.makedirs('temp_data')

# Save each batch of songs to a separate JSON file
def save_batch(batch, batch_number):
    filename = f'temp_data/songs_batch_{batch_number}.json'
    with open(filename, 'w') as f:
        json.dump(batch, f)
    print(f"Saved batch {batch_number} to {filename}")

# In your main loop where you process songs:
batch_size = 20
current_batch = []
batch_number = 1

for item in results['items']:
    # Process the track and get audio features/analysis as before
    track = item['track']
    track_id = track['id']
    audio_info = get_audio_features_and_analysis(track_id)
    
    song_data = {
        "uri": track['uri'],
        "name": track['name'],
        "artist": track['artists'][0]['name'],
        "album": track['album']['name'],
        "popularity": track['popularity'],
        "added_at": item['added_at'],
        **audio_info
    }
    
    current_batch.append(song_data)
    
    if len(current_batch) == batch_size:
        save_batch(current_batch, batch_number)
        current_batch = []
        batch_number += 1

# Save any remaining songs in the last batch
if current_batch:
    save_batch(current_batch, batch_number)

NameError: name 'results' is not defined

In [5]:
from pymongo import MongoClient
from pymongo.server_api import ServerApi
import os

mongo_uri = os.getenv('YOUR_MONGO_URI')
# Connect to MongoDB
client = MongoClient(mongo_uri, server_api=ServerApi('1'))
db = client['portfolio-db']
collection = db['songs']
# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

localhost:27017: [Errno 111] Connection refused (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 667db56d7fc5727dcd1c9a7d, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [Errno 111] Connection refused (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>


In [None]:
# Function to upload a batch of songs
def upload_batch(filename):
    with open(filename, 'r') as f:
        songs = json.load(f)
    
    result = collection.insert_many(songs)
    print(f"Uploaded {len(result.inserted_ids)} songs from {filename}")

# Upload all batches
for filename in os.listdir('temp_data'):
    if filename.endswith('.json'):
        upload_batch(os.path.join('temp_data', filename))

print("All data uploaded to MongoDB")

Le altre cose da importare oltre ai saved tracks

In [None]:
def get_liked_songs(sp):
    results = sp.current_user_saved_tracks()
    liked_songs = []
    while results:
        for item in results['items']:
            track = item['track']
            liked_songs.append(track)
        if results['next']:
            results = sp.next(results)
        else:
            results = None
    return liked_songs

def get_liked_albums(sp):
    results = sp.current_user_saved_albums()
    liked_albums = []
    while results:
        for item in results['items']:
            album = item['album']
            for track in album['tracks']['items']:
                liked_albums.append(track)
        if results['next']:
            results = sp.next(results)
        else:
            results = None
    return liked_albums

def get_playlist_tracks(sp):
    results = sp.current_user_playlists()
    playlist_tracks = []
    while results:
        for playlist in results['items']:
            tracks = sp.playlist_tracks(playlist['id'])
            for item in tracks['items']:
                track = item['track']
                playlist_tracks.append(track)
        if results['next']:
            results = sp.next(results)
        else:
            results = None
    return playlist_tracks

liked_songs = get_liked_songs(sp)
liked_albums = get_liked_albums(sp)
playlist_tracks = get_playlist_tracks(sp)

all_tracks = liked_songs + liked_albums + playlist_tracks

# Rimuovi duplicati basati sull'ID della canzone
unique_tracks = {track['id']: track for track in all_tracks}.values()

# Stampa i titoli delle canzoni
for track in unique_tracks:
    print(track['name'], '-', track['artists'][0]['name'])


## Postgres


In [None]:
import psycopg2
import json
from psycopg2.extras import Json

# Database connection parameters
db_params = {
    "host": "localhost",
    "database": "your_database_name",
    "user": "your_username",
    "password": "your_password"
}

# Assume 'results' is your list of JSON objects, one for each song

# Create a table in your PostgreSQL database (run this once)
def create_table():
    conn = psycopg2.connect(**db_params)
    cur = conn.cursor()
    cur.execute("""
    CREATE TABLE IF NOT EXISTS songs (
        id SERIAL PRIMARY KEY,
        name VARCHAR(255),
        artist VARCHAR(255),
        danceability FLOAT,
        energy FLOAT,
        key INTEGER,
        loudness FLOAT,
        mode INTEGER,
        speechiness FLOAT,
        acousticness FLOAT,
        instrumentalness FLOAT,
        liveness FLOAT,
        valence FLOAT,
        tempo FLOAT,
        duration_ms INTEGER,
        time_signature INTEGER,
        segments INTEGER,
        bars INTEGER,
        beats INTEGER,
        sections INTEGER,
        tatums INTEGER,
        full_data JSONB
    )
    """)
    conn.commit()
    cur.close()
    conn.close()

# Insert data into the database
def insert_songs(songs):
    conn = psycopg2.connect(**db_params)
    cur = conn.cursor()

    for song in songs:
        cur.execute("""
        INSERT INTO songs (
            name, artist, danceability, energy, key, loudness, mode,
            speechiness, acousticness, instrumentalness, liveness,
            valence, tempo, duration_ms, time_signature, segments,
            bars, beats, sections, tatums, full_data
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            song['name'], song['artist'], song['danceability'], song['energy'],
            song['key'], song['loudness'], song['mode'], song['speechiness'],
            song['acousticness'], song['instrumentalness'], song['liveness'],
            song['valence'], song['tempo'], song['duration_ms'], song['time_signature'],
            song['segments'], song['bars'], song['beats'], song['sections'],
            song['tatums'], Json(song)
        ))

    conn.commit()
    cur.close()
    conn.close()

# Main execution
if __name__ == "__main__":
    # Create the table (run this once)
    create_table()

    # Insert the songs
    insert_songs(results)  # 'results' is your list of song JSONs

    print("Data inserted successfully!")
