In [1]:
! pip install pyaudio
! pip install essentia
! pip install spotipy

import pyaudio
import wave
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt




<h1>Using Spotify Api to Extract Timbre Features</h1>

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = 'e79322f9673a45569ede905dd9cb3c5c'  # Replace with your client ID
client_secret = 'b9e729549efb4fd48e3429080292f331'  # Replace with your client secret

credentials = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=credentials)


In [3]:
def extract_timbre_features(track_id):
    # Fetch audio analysis
    analysis = sp.audio_analysis(track_id)

    # Extract timbre vectors from each segment
    timbre_vectors = [segment['timbre'] for segment in analysis['segments']]
    timbre_array = np.array(timbre_vectors)

    # Calculate averages and covariances
    avg_timbre = np.mean(timbre_array, axis=0)
    cov_matrix = np.cov(timbre_array, rowvar=False)

    # Extract the upper triangle of the covariance matrix
    upper_triangle_indices = np.triu_indices_from(cov_matrix, k=1)
    upper_triangle = cov_matrix[upper_triangle_indices]

    # Combine the diagonal and upper triangle
    cov_timbre = np.concatenate([np.diag(cov_matrix), upper_triangle])

    return avg_timbre, cov_timbre


In [4]:
def create_feature_dataframe(tracks):
    # Initialize lists to store data
    data = []

    for track_id in tracks:
        try:
            # Extract the release year (label) and timbre features
            track = sp.track(track_id)
            release_year = track['album']['release_date'][:4]
            avg_timbre, cov_timbre = extract_timbre_features(track_id)

            # Create a single row for the DataFrame
            row = [release_year] + list(avg_timbre) + list(cov_timbre)
            data.append(row)
        except Exception as e:
            print(f"Error processing track ID {track_id}: {e}")

    # Column names
    columns = ['Label'] + [f'TimbreAvg{i+1}' for i in range(12)] + [f'TimbreCovariance{i+1}' for i in range(78)]

    # Create DataFrame
    df = pd.DataFrame(data, columns=columns)
    return df

In [5]:

# Example usage
track_ids = ['https://open.spotify.com/track/5rb9QrpfcKFHM1EUbSIurX?si=068a06909b1e48a6']  # Replace with your list of Spotify track IDs
df = create_feature_dataframe(track_ids)

# Display the first few rows of the DataFrame
df.head()

# Optional: Save the DataFrame to a CSV file
df.to_csv('Data/spotify_timbre_features.csv', index=False)

<h1> Recording Audio <h1>

In [11]:
# # Setup channel, rate, and chunk size
# FORMAT = pyaudio.paInt16 #This sets the format of the audio using the PyAudio library. paInt16 is one of the format codes provided by PyAudio, representing 16-bit int audio.
# CHANNELS = 1
# RATE = 44100
# CHUNK = 1024
# RECORD_SECONDS = 5
# FILE_NAME = "recordedFile.wav"

# audio = pyaudio.PyAudio()

<h3>Code Breakdown</h3>

<ul style="font-size: small;">
    <li>Setup channel, rate, and chunk size</li>
    <li><code>FORMAT = pyaudio.paInt16</code>
        <ul>
            <li>This sets the format of the audio using the PyAudio library.</li>
            <li><code>paInt16</code> is one of the format codes provided by PyAudio, representing 16-bit int audio.</li>
        </ul>
    </li>
</ul>
<ul style="font-size: small;">
    <li><code>CHANNELS = 1</code>
        <ul>
            <li>This sets the number of audio channels. 2 means it is using stereo sound (left and right channels). </li>
            <li>If you were recording mono audio, you would set this to 1.</li>
        </ul>
    </li>
</ul>
<ul style="font-size: small;">
    <li><code>RATE = 44100</code>
        <ul>
            <li>This is the sampling rate. 44100 Hz (or 44.1 kHz) is a common sampling rate for audio</li>
        </ul>
    </li>
</ul>
<ul style="font-size: small;">
    <li><code>RECORD_SECONDS = 5</code>
        <ul>
            <li>This specifies the duration for which the audio recording will last. Here, it's set to 5 seconds.</li>
        </ul>
    </li>
</ul>

In [12]:
# # Start recording
# stream = audio.open(format=FORMAT, channels=CHANNELS,
#                     rate=RATE, input=True,
#                     frames_per_buffer=CHUNK)
# print("Recording...")
# frames = []

# for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
#     data = stream.read(CHUNK)
#     frames.append(data)
# print("Finished recording.")

# # Stop recording
# stream.stop_stream()
# stream.close()
# audio.terminate()

# # Writing to file
# waveFile = wave.open(FILE_NAME, 'wb')
# waveFile.setnchannels(CHANNELS)
# waveFile.setsampwidth(audio.get_sample_size(FORMAT))
# waveFile.setframerate(RATE)
# waveFile.writeframes(b''.join(frames))
# waveFile.close()

Recording...
Finished recording.


In [1]:
# import essentia.standard as es

# # Load the audio file
# loader = es.MonoLoader(filename='recordedFile.wav')
# audio = loader()

# # Initialize the algorithm
# w = es.Windowing(type='hann')
# spectrum = es.Spectrum()
# mfcc = es.MFCC()

# # Compute MFCCs
# mfccs = []
# for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
#     mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
#     mfccs.append(mfcc_coeffs)

# # `mfccs` will contain the MFCCs for each frame in the audio file


: 