In [63]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

folder_path = 'genres_original'

def load_audio_files(folder_path):
    audio_files = []
    for genre in os.listdir(folder_path):
        genre_folder = os.path.join(folder_path, genre)
        if os.path.isdir(genre_folder):
            for file in os.listdir(genre_folder):
                if file.endswith('.wav') and genre_folder == 'genres_original/rock':
                    audio_files.append(os.path.join(genre_folder, file))
    return audio_files





In [65]:
def calculate_zero_crossing_rate(y):
    """
    Calculate the Zero-Crossing Rate of an audio signal.
    
    Parameters:
        y (numpy.ndarray): Audio time-series signal.
    
    Returns:
        zcr (float): Zero-Crossing Rate.
    """
    # Count the number of times the signal changes sign
    zero_crossings = np.sum(np.abs(np.diff(np.sign(y)))) / 2
    # Normalize by the total number of samples
    zcr = zero_crossings / len(y)
    return zcr


def calculate_temporal_centroid(y, sr):
    """
    Calculate the Temporal Centroid of an audio signal.
    
    Parameters:
        y (numpy.ndarray): Audio time-series signal.
        sr (int): Sampling rate of the audio.
    
    Returns:
        temporal_centroid (float): Temporal Centroid in seconds.
    """
    # Compute the squared signal (energy at each sample)
    energy = y ** 2
    # Generate a time array corresponding to each sample
    time = np.arange(len(y)) / sr
    # Compute the Temporal Centroid
    temporal_centroid = np.sum(time * energy) / np.sum(energy)
    return temporal_centroid

def calculate_energy_fft(y, sr):
    """
    Calculate the energy of an audio signal across bass, midrange, and treble frequencies using the Fourier Transform.
    
    Parameters:
        y (numpy.ndarray): Audio time-series signal.
        sr (int): Sampling rate of the audio.
    
    Returns:
        energy_distribution (dict): Dictionary containing the energy of bass, midrange, and treble frequency bands.
    """
    # Perform the Fourier Transform
    yf = np.fft.fft(y)
    # Compute the power spectrum
    psd = np.abs(yf) ** 2
    # Compute the frequencies corresponding to the FFT
    freqs = np.fft.fftfreq(len(y), 1 / sr)
    
    # Define frequency ranges for bass, midrange, and treble
    bass_range = (0, 250)  # Bass: 0-250 Hz
    midrange_range = (250, 4000)  # Midrange: 250-4000 Hz
    treble_range = (4000, sr / 2)  # Treble: 4000 Hz to Nyquist frequency
    
    # Calculate energy for each range
    bass_energy = np.sum(psd[(freqs >= bass_range[0]) & (freqs < bass_range[1])]) / len(psd)
    midrange_energy = np.sum(psd[(freqs >= midrange_range[0]) & (freqs < midrange_range[1])]) / len(psd)
    treble_energy = np.sum(psd[(freqs >= treble_range[0]) & (freqs < treble_range[1])]) / len(psd)
    
    # Return energy distribution
    energy_distribution = {
        "bass": bass_energy,
        "midrange": midrange_energy,
        "treble": treble_energy
    }
    
    return energy_distribution

def calculate_spectral_bandwidth(y, sr):
    """
    Calculate the Spectral Bandwidth of an audio signal.
    
    Parameters:
        y (numpy.ndarray): Audio time-series signal.
        sr (int): Sampling rate of the audio.
    
    Returns:
        spectral_bandwidth (float): Spectral Bandwidth in Hz.
    """
    # Perform FFT and calculate the magnitude spectrum
    fft_spectrum = np.fft.fft(y)
    magnitude_spectrum = np.abs(fft_spectrum[:len(fft_spectrum) // 2])
    
    # Compute the corresponding frequency array
    freqs = np.fft.fftfreq(len(y), d=1/sr)[:len(fft_spectrum) // 2]
    
    # Compute the Spectral Centroid
    spectral_centroid = np.sum(freqs * magnitude_spectrum) / np.sum(magnitude_spectrum)
    
    # Compute the Spectral Bandwidth
    spectral_bandwidth = np.sqrt(np.sum((freqs - spectral_centroid)**2 * magnitude_spectrum) / np.sum(magnitude_spectrum))
    
    return spectral_bandwidth

def calculate_spectral_rolloff(y, sr, rolloff_percent=0.85):
    """
    Calculate the Spectral Rolloff of an audio signal.
    
    Parameters:
        y (numpy.ndarray): Audio time-series signal.
        sr (int): Sampling rate of the audio.
        rolloff_percent (float): Percentage of total energy to calculate rolloff (default: 0.85).
    
    Returns:
        spectral_rolloff (float): Spectral Rolloff frequency in Hz.
    """
    # Perform FFT and calculate the magnitude spectrum
    fft_spectrum = np.fft.fft(y)
    magnitude_spectrum = np.abs(fft_spectrum[:len(fft_spectrum) // 2])
    
    # Compute the corresponding frequency array
    freqs = np.fft.fftfreq(len(y), d=1/sr)[:len(fft_spectrum) // 2]
    
    # Compute the cumulative sum of the spectrum
    cumulative_energy = np.cumsum(magnitude_spectrum)
    
    # Find the rolloff frequency where the cumulative energy reaches the specified percentage
    total_energy = cumulative_energy[-1]
    rolloff_threshold = rolloff_percent * total_energy
    rolloff_index = np.where(cumulative_energy >= rolloff_threshold)[0][0]
    
    # Get the corresponding frequency
    spectral_rolloff = freqs[rolloff_index]
    
    return spectral_rolloff


In [75]:
from scipy.signal import butter, lfilter
from scipy.fft import fft


def extract_features_simple(file_path):
        y, sr = librosa.load(file_path, sr=None)  # Load the audio file
        # Extract zero crossing rate from this signal using scipy

        #Extract the genre from the file path
        genre = file_path.split('/')[1]
        zero_crossing_rate = calculate_zero_crossing_rate(y)


        # Calculate the energy using the Fourier Transform
        energy_fft = calculate_energy_fft(y, sr)

        #extract tempo of the song
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo, _ = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)

        #calculate speactral bandwidth
        spectral_bandwidth = calculate_spectral_bandwidth(y, sr)
        spectral_rolloff = calculate_spectral_rolloff(y, sr, rolloff_percent=0.85)







      
        #Create a dataframe with this information
        features = {
            "zero_crossing_rate": zero_crossing_rate,
            "tempo": int(tempo[0]),
            "spectral_bandwidth": spectral_bandwidth,
            "spectral_rolloff": spectral_rolloff,
            **energy_fft,
            "genre": genre
        }

        return features

    


In [None]:
import os
import pandas as pd


def create_feature_dataset(folder_path):
    audio_files = load_audio_files(folder_path)
    
    features = []
    i = 0
    for file in audio_files:
       
        if i == 10:
            break

        i+=1

        feature_vector = extract_features_simple(file) 
        features.append(feature_vector)
    
    # Convert the list of features into a DataFrame
    feature_df = pd.DataFrame(features)
    
    return feature_df

folder_path = 'genres_original'  # Replace with your actual path
features_df = create_feature_dataset(folder_path)





TypeError: 'NoneType' object is not iterable

In [79]:
features_df

Unnamed: 0,zero_crossing_rate,tempo,spectral_bandwidth,spectral_rolloff,bass,midrange,treble,genre
0,0.07182,99,2411.224839,5269.19147,5245.100098,2222.495605,126.123238,rock
1,0.150329,143,2285.82463,5441.281653,1292.20166,2449.677979,191.842484,rock
2,0.143638,151,2081.513734,4477.909818,2512.864746,5834.796875,306.676544,rock
3,0.146839,112,2276.150714,5437.183474,1456.748291,1963.696777,141.885117,rock
4,0.158019,103,2129.473531,4586.994669,2238.630127,5361.081055,275.721375,rock
5,0.136322,117,2259.615544,5156.841404,800.654419,2435.361328,152.379135,rock
6,0.140759,135,2714.492565,6006.335348,9974.370117,6756.904785,603.320129,rock
7,0.048792,112,2006.81415,3604.231453,2079.025146,632.833008,13.944582,rock
8,0.057056,129,2808.626654,5470.568636,4119.938965,1078.186035,54.706669,rock
9,0.114309,135,2438.544763,5622.73434,2159.756104,1001.532959,105.697975,rock


In [None]:
#train a model using feature_df as the training data, clustering
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()


# Select the features to use for clustering
X = features_df.drop(columns=["genre"])
X = scaler.fit_transform(X)

#split the data
from sklearn.model_selection import train_test_split

X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)




# Train a KMeans clustering model
kmeans = KMeans(n_clusters=10, random_state=42)
kmeans.fit(X_train)

# Predict the clusters on the test data
y_pred = kmeans.predict(X_test)

print(y_pred)




