In [10]:
import os
import pandas as pd
import numpy as np
from librosa import load as lr_load
import librosa as lr
from librosa import amplitude_to_db
from librosa.feature import melspectrogram, mfcc, chroma_stft, spectral_flatness, zero_crossing_rate, tempogram
from librosa.beat import tempo, beat_track
from librosa.feature import spectral_contrast as lf_spectral_contrast
from librosa.feature import spectral_rolloff as lf_spectral_rolloff
from librosa import pyin
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import IPython.display as ipd
from librosa import pyin


In [4]:
%reset
import os
import numpy as np
import pandas as pd
import librosa as lr
from IPython.display import display
from librosa import amplitude_to_db
from librosa.feature import chroma_stft, melspectrogram, mfcc, tempogram, zero_crossing_rate
from pydub import AudioSegment
from scipy.io import wavfile
class AudioFeatureExtractor:
    def __init__(self):
        pass

    def mfccs(self, data, sfreq):
        mfcc_result = mfcc(y=data, sr=sfreq)
        datadict = {}
        for var in range(len(mfcc_result)):
            datadict[f'mfcc{var + 1}_mean'] = np.mean(mfcc_result[var, :])
        return datadict

    def calculate_spectrograms(self, audio_clips, n_fft=2048, hop_length=512, win_length=None):
        spectrograms = []
        spectrograms_db = []

        for clip in audio_clips:
            stft_matrix = lr.stft(y=clip, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
            spectrogram = np.abs(stft_matrix)
            spec_db = amplitude_to_db(S=spectrogram, ref=np.max)
            spectrograms.append(spectrogram)
            spectrograms_db.append(spec_db)

        return spectrograms, spectrograms_db

    def calculate_spectral_features(self, spectrograms):
        bandwidths = []
        centroids = []

        for spectrogram in spectrograms:
            spec_bw = lr.feature.spectral_bandwidth(S=spectrogram)
            spec_cn = lr.feature.spectral_centroid(S=spectrogram)
            bandwidths.append(spec_bw)
            centroids.append(spec_cn)

        return bandwidths, centroids

    def calculate_spectral_contrast(self, data, sr, n_fft=2048, hop_length=512):
        spectral_contrast = lr.feature.spectral_contrast(y=data, sr=sr, n_fft=n_fft, hop_length=hop_length)
        return spectral_contrast

    def calculate_tonnetz(self, data, sr):
        tonnetz = lr.feature.tonnetz(y=data, sr=sr)
        return tonnetz

    def calculate_spectral_rolloff(self, data, sr, roll_percent=0.85, n_fft=2048, hop_length=512):
        spectral_rolloff = lr.feature.spectral_rolloff(y=data, sr=sr, roll_percent=roll_percent, n_fft=n_fft, hop_length=hop_length)
        return spectral_rolloff

    def calculate_chroma_features(self, data, sr, n_fft=2048, hop_length=512):
        chromagram = chroma_stft(y=data, sr=sr, n_fft=n_fft, hop_length=hop_length)
        return chromagram

    def calculate_mel_spectral_contrast(self, data, sr, n_fft=2048, hop_length=512):
        mel_spectrogram = melspectrogram(y=data, sr=sr, n_fft=n_fft, hop_length=hop_length)
        mel_spectral_contrast = lr.feature.spectral_contrast(S=mel_spectrogram)
        return mel_spectral_contrast

    def calculate_spectral_flatness(self, data, sr, n_fft=2048, hop_length=512):
        flatness = lr.feature.spectral_flatness(y=data, n_fft=n_fft, hop_length=hop_length)
        return flatness

    def zero_crossing_rate_features(self, data, n_fft=2048, hop_length=512):
        zcr = zero_crossing_rate(y=data, frame_length=n_fft, hop_length=hop_length)
        return zcr

    def calculate_tempogram(self, data, sr):
        onset_env = lr.onset.onset_strength(y=data, sr=sr)
        tempogram_result = lr.feature.tempogram(onset_envelope=onset_env, sr=sr)
        return tempogram_result

    def calculate_polyphonic_pitch(self, data, sr):
        f0, voiced_flag, voiced_probs = pyin(y=data, fmin=lr.note_to_hz('C1'), fmax=lr.note_to_hz('C8'))
        return f0, voiced_flag, voiced_probs

    def rms_energy_features(self, data):
        rms_energy = lr.feature.rms(y=data)[0]
        return rms_energy




In [5]:
# Create an instance of the AudioFeatureExtractor
audio_extractor = AudioFeatureExtractor()

In [11]:
# Define the path to the CSV file and the audio directory
csv_path = "labels_new.csv"
audio_directory = "labeled"

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_path)

# Create an instance of the AudioFeatureExtractor class
audio_extractor = AudioFeatureExtractor()

# Audio features storage
audio_features = []

# Set the target length for audio truncation or padding
target_length = 30 * 22050

# Loop through each row in the DataFrame
for index, row in df.iterrows():
    # Construct the file path for the audio file
    file_path = os.path.join(audio_directory, row['filename'])

    # Load the audio file using librosa
    data, sfreq = lr.load(file_path, sr=None)

    # Truncate or pad the audio to the target length
    if len(data) > target_length:
        data = data[:target_length]
    elif len(data) < target_length:
        padding = target_length - len(data)
        data = np.pad(data, (0, padding), mode='constant')

    # Extract MFCC features
    mfcc_features = audio_extractor.mfccs(data, sfreq)

    # Extract other features using the AudioFeatureExtractor class
    spectrograms, _ = audio_extractor.calculate_spectrograms([data])
    bandwidths, centroids = audio_extractor.calculate_spectral_features(spectrograms)
    spectral_contrast = audio_extractor.calculate_spectral_contrast(data, sfreq)
    tonnetz = audio_extractor.calculate_tonnetz(data, sfreq)
    spectral_rolloff = audio_extractor.calculate_spectral_rolloff(data, sfreq)
    chroma_features = audio_extractor.calculate_chroma_features(data, sfreq)
    mel_spectral_contrast = audio_extractor.calculate_mel_spectral_contrast(data, sfreq)
    spectral_flatness = audio_extractor.calculate_spectral_flatness(data, sfreq)
    zcr = audio_extractor.zero_crossing_rate_features(data)
    tempogram_result = audio_extractor.calculate_tempogram(data, sfreq)
    f0, _, _ = audio_extractor.calculate_polyphonic_pitch(data, sfreq)
    rms_energy = audio_extractor.rms_energy_features(data)

    # Combine all features into a dictionary
    features_dict = {
        **mfcc_features,
        'mean_bandwidth': np.mean(bandwidths),
        # ... (add other features to the dictionary)
        'tempogram_result': tempogram_result,
        'polyphonic_pitch': f0,
        'rms_energy': rms_energy
    }

    # Append the features dictionary to the list
    audio_features.append(features_dict)

# Convert the list of feature dictionaries to a DataFrame
audio_features_df = pd.DataFrame(audio_features)

# Concatenate the original DataFrame with the new features DataFrame
df = pd.concat([df, audio_features_df], axis=1)

# Display the updated DataFrame
print(df.head())

     filename    genre  mfcc1_mean  mfcc2_mean  mfcc3_mean  mfcc4_mean  \
0  m00248.wav    metal  -75.517509   81.911423  -22.081079   69.876999   
1  m00230.wav  country  -89.736382   36.286201   11.469535   37.495590   
2  m00637.wav   hiphop -122.780525   95.061287  -29.363251   46.780045   
3  m00627.wav    metal  -57.683388  101.432320  -41.485245   55.130600   
4  m00138.wav   reggae -198.632797  102.413582  -10.031448   30.802383   

   mfcc5_mean  mfcc6_mean  mfcc7_mean  mfcc8_mean  ...  mfcc15_mean  \
0  -11.740438   25.740246  -18.518965   27.027710  ...   -10.154532   
1    9.199136    3.740519    2.840358    6.811539  ...     1.711515   
2  -15.998563   27.117586  -13.113779   20.258003  ...    -0.753110   
3  -23.349279   28.151098  -12.139105   18.150204  ...   -12.406642   
4   -3.310606   20.276924   -5.637373   28.095631  ...    -3.469470   

   mfcc16_mean  mfcc17_mean  mfcc18_mean  mfcc19_mean  mfcc20_mean  \
0     6.781166   -10.487830     4.372530    -5.466021     

In [None]:
# clustering door Mark

class Clustering:
    def __init__(self, data):
        self.data = data
        self.cluster_labels = None

    def cluster_kmeans(self, n_clusters=3):
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        self.cluster_labels = kmeans.fit_predict(self.scaled_data)
        return np.column_stack((self.data, self.cluster_labels))
    
    def cluster_agglomerative(self, n_clusters=3):
        agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
        self.cluster_labels = agglomerative.fit_predict(self.scaled_data)
        return np.column_stack((self.data, self.cluster_labels))
    
    def cluster_dbscan(self, eps=0.5, min_samples=5):
        dbscan = DBSCAN(eps=eps, min_samples=min_samples)
        self.cluster_labels = dbscan.fit_predict(self.data)
        return np.column_stack((self.data, self.cluster_labels))

    def cluster_birch(self, n_clusters=3):
        birch = Birch(n_clusters=n_clusters)
        self.cluster_labels = birch.fit_predict(self.data)
        return np.column_stack((self.data, self.cluster_labels))

    def cluster_meanshift(self, bandwidth=0.5):
        meanshift = MeanShift(bandwidth=bandwidth)
        self.cluster_labels = meanshift.fit_predict(self.data)
        return np.column_stack((self.data, self.cluster_labels))