In [17]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import os
import requests
from io import BytesIO
from pydub import AudioSegment

audio_folder = r'/home/missantroop/SongAnalysis/AudioDataset/mp3'

file_path = '/home/missantroop/SongAnalysis/AudioDataset/JamendoLyrics.csv'
df = pd.read_csv(file_path)


df['Filepath'] = df['Filepath'].apply(lambda x: os.path.join(audio_folder, x))
audio_files = df['Filepath']
audio_urls = df['URL']

print(df)

                                                  URL  \
0   https://www.jamendo.com/track/1559261/give-me-...   
1       https://www.jamendo.com/track/1552064/keep-on   
2   https://www.jamendo.com/track/1537288/back-in-...   
3        https://www.jamendo.com/track/1442030/peyote   
4        https://www.jamendo.com/track/1465148/embers   
..                                                ...   
74  https://www.jamendo.com/track/1272521/une-vie-...   
75  https://www.jamendo.com/track/1474051/vente/ly...   
76  https://www.jamendo.com/track/886274/veraender...   
77  https://www.jamendo.com/track/1721215/le-royau...   
78  https://www.jamendo.com/track/141082/en-libert...   

                                             Filepath             Artist  \
0   /home/missantroop/SongAnalysis/jamendolyrics/m...               HILA   
1   /home/missantroop/SongAnalysis/jamendolyrics/m...   Quentin Hannappe   
2   /home/missantroop/SongAnalysis/jamendolyrics/m...        Songwriterz   
3   /home/m

In [18]:
df.head()

df.info()

df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79 entries, 0 to 78
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   URL           79 non-null     object
 1   Filepath      79 non-null     object
 2   Artist        79 non-null     object
 3   Title         79 non-null     object
 4   Genre         79 non-null     object
 5   LicenseType   79 non-null     object
 6   Language      79 non-null     object
 7   LyricOverlap  79 non-null     bool  
 8   Polyphonic    79 non-null     bool  
 9   NonLexical    79 non-null     bool  
dtypes: bool(3), object(7)
memory usage: 4.7+ KB


URL             0
Filepath        0
Artist          0
Title           0
Genre           0
LicenseType     0
Language        0
LyricOverlap    0
Polyphonic      0
NonLexical      0
dtype: int64

In [19]:
def save_spectrogram_with_features(filepath, output_path):
    try:
        y, sr = librosa.load(filepath)
        
        # Espectrograma
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_DB = librosa.power_to_db(S, ref=np.max)
        
        # Características adicionais
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        rms = librosa.feature.rms(y=y)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)

        plt.figure(figsize=(15, 10))
        
        # Plotar espectrograma
        plt.subplot(5, 1, 1)
        librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel-frequency spectrogram')

        # Plotar chroma feature
        plt.subplot(5, 1, 2)
        librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma')
        plt.colorbar()
        plt.title('Chroma feature')

        # Plotar MFCCs
        plt.subplot(5, 1, 3)
        librosa.display.specshow(mfccs, sr=sr, x_axis='time')
        plt.colorbar()
        plt.title('MFCC')

        # Plotar RMS energy
        plt.subplot(5, 1, 4)
        plt.semilogy(rms.T, label='RMS Energy')
        plt.xticks([])
        plt.xlim([0, rms.shape[-1]])
        plt.legend(loc='upper right')
        plt.title('RMS Energy')

        # Plotar Spectral Centroid
        plt.subplot(5, 1, 5)
        plt.semilogy(spectral_centroid.T, label='Spectral Centroid')
        plt.xticks([])
        plt.xlim([0, spectral_centroid.shape[-1]])
        plt.legend(loc='upper right')
        plt.title('Spectral Centroid')

        plt.tight_layout()
        plt.savefig(output_path)
        plt.close()
    except Exception as e:
        raise Exception(f"Error processing audio from {filepath}: {e}")

In [20]:
output_dir = 'spectrograms'
os.makedirs(output_dir, exist_ok=True)

for idx, audio_file in enumerate(audio_files):
    try:
        output_path = os.path.join(output_dir, f'spectrogram_{idx}.png')
        save_spectrogram_with_features(audio_file, output_path)
        print(f'Saved spectrogram for {audio_file} at {output_path}')
    except Exception as e:
        print(f'Error processing {audio_file}: {e}')

Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/HILA_-_Give_Me_the_Same.mp3 at spectrograms/spectrogram_0.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/Quentin_Hannappe_-_Keep_On.mp3 at spectrograms/spectrogram_1.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/Songwriterz_-_Back_In_Time.mp3 at spectrograms/spectrogram_2.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/Kinematic_-_Peyote.mp3 at spectrograms/spectrogram_3.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/Avercage_-_Embers.mp3 at spectrograms/spectrogram_4.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/Color_Out_-_Falling_Star.mp3 at spectrograms/spectrogram_5.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendolyrics/mp3/The.madpix.project_-_One_Way_Street.mp3 at spectrograms/spectrogram_6.png
Saved spectrogram for /home/missantroop/SongAnalysis/jamendol

In [33]:
def load_audio_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        audio_data = BytesIO(response.content)
        audio = AudioSegment.from_file(audio_data)
        samples = np.array(audio.get_array_of_samples())
        y = samples.astype(np.float32) / (2**15) 
        sr = audio.frame_rate
        return y, sr
    else:
        raise Exception(f"Failed to download audio from {url}")
    

In [34]:
def save_spectrogram_from_url(url, output_path):
    y, sr = load_audio_from_url(url)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_DB = librosa.power_to_db(S, ref=np.max)
    
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-frequency spectrogram')
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

In [35]:
output_dir = 'spectrograms'
os.makedirs(output_dir, exist_ok=True)


for idx, audio_url in enumerate(audio_urls):
    try:
        output_path = os.path.join(output_dir, f'spectrogram_{idx}.png')
        save_spectrogram_from_url(audio_url, output_path)
        print(f'Saved spectrogram for {audio_url} at {output_path}')
    except Exception as e:
        print(f'Error processing {audio_url}: {e}')

Error processing https://www.jamendo.com/track/1559261/give-me-the-same: [WinError 2] O sistema não pode encontrar o arquivo especificado
Error processing https://www.jamendo.com/track/1552064/keep-on: [WinError 2] O sistema não pode encontrar o arquivo especificado
Error processing https://www.jamendo.com/track/1537288/back-in-time: [WinError 2] O sistema não pode encontrar o arquivo especificado
Error processing https://www.jamendo.com/track/1442030/peyote: [WinError 2] O sistema não pode encontrar o arquivo especificado
Error processing https://www.jamendo.com/track/1465148/embers: [WinError 2] O sistema não pode encontrar o arquivo especificado


KeyboardInterrupt: 