time signature (Audio)

Valence (Audio)

Danceability: tempo, chroma_stft

Energy: rms, spectral_centroid

Instrumentalness: mfcc (Higher variance in MFCCs might indicate instrumental tracks)

Loudness: rms

Speechiness: zcr, mfcc (Patterns in MFCCs that indicate speech)

Valence, Mood, Emotion: These are subjective and would require complex modeling with labeled data to infer accurately from the extracted features. Machine learning models trained on datasets where songs 
are labeled with these attributes can use these features as input to predict mood

In [10]:
import requests
from dotenv import load_dotenv
import os
import json
import base64   
import pandas as pd
import librosa
import numpy as np
import lyricsgenius
import langdetect
import re
import string
import tempfile

In [11]:
lastfm_api_key = "97d5a64d5ba4a8bc580b752ceff3b87f"
lastfm_secret = "35175090bd61f6f16ac607bd26e5b1de"

In [12]:
def search_deezer_track(track_name):
    search_url = "https://api.deezer.com/search/track"
    params = {"q": track_name}
    response = requests.get(search_url, params=params)
    
    if response.status_code == 200:
        tracks = response.json().get('data', [])
        for track in tracks:
            print(f"Track ID: {track['id']}, Title: {track['title']}, Artist: {track['artist']['name']}")
    else:
        print(f"Failed to search tracks. Status code: {response.status_code}")


def get_deezer_track_info(track_id):
    """
    Fetch track information and MP3 preview file URL from Deezer API.

    Parameters:
    - track_id: The unique identifier for the track on Deezer.

    Returns:
    A dictionary with track information and the preview URL.
    """
    base_url = "https://api.deezer.com/track/"
    response = requests.get(f"{base_url}{track_id}")
    
    if response.status_code == 200:
        data = response.json()
        main_artist = data.get("artist", {}).get("name", "")
        contributors = [contributor['name'] for contributor in data.get("contributors", []) if contributor['name'] != main_artist]
        featured_artists = ", ".join(contributors) if contributors else None

        track_info = {
            "title": data.get("title"),
            "artist": main_artist,
            "featured_artists": featured_artists,
            "duration": data.get("duration"),
            "album": data.get("album", {}).get("title"),
            "preview_url": data.get("preview"),
            "link": data.get("link")
        }
        return track_info
    else:
        print(f"Failed to fetch data for track ID {track_id}. Status code: {response.status_code}")
        return {}


In [13]:
def extract_librosa_features_from_url(url):
    # Fetch the audio file from the URL
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to download audio file from {url}")
    
    # Create a temporary file and manually manage it
    temp_dir = tempfile.gettempdir()
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3", dir=temp_dir)
    temp_file_path = temp_file.name
    
    try:
        # Write the fetched content to the temp file and close it to release the lock
        temp_file.write(response.content)
        temp_file.close()
        
        # Now, load the audio file from the path
        y, sr = librosa.load(temp_file_path, sr=None)  # Using sr=None to preserve the original sampling rate
        
        # Analyze the audio file
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
        harmonic, percussive = librosa.effects.hpss(y)
        rms = np.mean(librosa.feature.rms(y=y))
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
        zcr = np.mean(librosa.feature.zero_crossing_rate(y))
        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
        
        tonnetz = np.mean(librosa.feature.tonnetz(y=y, sr=sr).T, axis=0)
        spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0)
        
        harmonic_centroid = librosa.feature.spectral_centroid(y=harmonic, sr=sr)
        percussive_centroid = librosa.feature.spectral_centroid(y=percussive, sr=sr)


        mfcc_var = np.std(librosa.feature.mfcc(y=y, sr=sr).T, axis=0)
        chroma_var = np.std(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)

        features = {
            'mfcc': np.mean(librosa.feature.mfcc(y=y, sr=sr).T, axis=0),
            'mfcc_var': mfcc_var, 
            'chroma': np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0),
            'chroma_var': chroma_var, 
            'rms': rms,
            'spectral_bandwidth': spectral_bandwidth,
            'spectral_contrast': spectral_contrast,
            'spectral_centroid': spectral_centroid,
            'harmonic_centroid': harmonic_centroid,
            'percussive_centroid': percussive_centroid,
            'zcr': zcr,
            'tonnetz': tonnetz,
            'tempo': tempo,
            'beat_times': beat_times
        }
    finally:
        # Ensure the temporary file is removed after processing
        os.remove(temp_file_path)

    return features

In [21]:
def estimate_danceability(features):
    beat_intervals = np.diff(features['beat_times'])
    beat_interval_std = np.std(beat_intervals)
    # Lower standard deviation in beat intervals indicates more consistent rhythm
    rhythmic_stability = 1 - (beat_interval_std / (np.mean(beat_intervals) + 1e-6))
    # Incorporate tempo stability (if available) and chroma features for rhythm
    tempo_confidence = features.get('tempo_confidence', 0.5)  # Assuming a placeholder value if not provided
    return np.mean([rhythmic_stability, tempo_confidence, np.mean(features['chroma'])])

def estimate_loudness(features):
    # Convert RMS to a dB scale for a more perceptual loudness measurement
    rms_db = 20 * np.log10(features['rms'] + 1e-6)
    # Map the dB scale to a 0-1 range considering human hearing sensitivity
    return np.interp(rms_db, [-60, 0], [0, 1])



def calculate_song_features(features):
    scores = {}

    # Danceability
    scores['danceability'] = estimate_danceability(features)

    # Loudness
    scores['loudness'] = estimate_loudness(features)

    return scores



In [33]:
def analyze_artist_tracks(artist_name, song_name):

    def search_deezer_track(track_name):
        search_url = "https://api.deezer.com/search/track"
        query = f'artist:"{artist_name}" track:"{song_name}"'
        params = {"q": query}
        response = requests.get(search_url, params=params)
        if response.status_code == 200:
            tracks_data = response.json().get('data', [])
            for track in tracks_data:
                if artist_name.lower() in track['artist']['name'].lower():
                    # Return the first match immediately
                    return {
                        'id': track['id'],
                        'title': track['title'],
                        'artist': track['artist']['name'],
                        'preview_url': track['preview']
                    }
        return None  # Return None if no match is found

    # Search for the track
    track_name = f"{artist_name} {song_name}"
    track = search_deezer_track(track_name)

    # Initialize DataFrame
    df = pd.DataFrame(columns=['Song Name', 'Artist Name', 'Danceability', 'Loudness'])

    # Process each track found
    if track:
        # Extract features
        features = extract_librosa_features_from_url(track['preview_url'])

        # Calculate scores
        score = calculate_song_features(features)

        # Append to DataFrame directly without for-loop
        df = df.append({
            'Song Name': track['title'],
            'Artist Name': track['artist'],
            'Danceability': score.get('danceability', np.nan),
            'Loudness': score.get('loudness', np.nan)
        }, ignore_index=True)

    return df


In [22]:
analyze_artist_tracks("Psy", "Gangnam Style")

  df = df.append({


Unnamed: 0,Song Name,Artist Name,Danceability,Loudness
0,Gangnam style,Psy,0.645899,0.829844


In [23]:
analyze_artist_tracks("IU", "Blueming")

  df = df.append({


Unnamed: 0,Song Name,Artist Name,Danceability,Loudness
0,Blueming,IU,0.617445,0.830772


In [24]:
analyze_artist_tracks("Ludwig van Beethoven", "Fur Elise")

  df = df.append({


Unnamed: 0,Song Name,Artist Name,Danceability,Loudness
0,Für Elise,Ludwig van Beethoven,0.561579,0.410895
