In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix
import pandas as pd
import numpy as np
import os
import librosa
import csv

In [60]:
def extract_features(y, sr):
    #y, sr = librosa.load(audio_file, mono=True)
    # Preprocess audio file
    #y = librosa.util.normalize(y)   # Normalize audio to have values between -1 and 1
    #y = librosa.effects.trim(y)[0]  # Trim silence from beginning and end of audio
    # Extract features
    chroma_stft_mean, chroma_stft_var = get_chroma_stft(y, sr)
    rms_mean, rms_var = get_rms(y)
    spectral_centroid_mean, spectral_centroid_var = get_spectral_centroid(y, sr)
    spectral_bandwidth_mean, spectral_bandwidth_var = get_spectral_bandwidth(y, sr)
    rolloff_mean, rolloff_var = get_rolloff(y, sr)
    zero_crossing_rate_mean, zero_crossing_rate_var = get_zero_crossing_rate(y)
    harmony_mean, harmony_var = get_harmony(y)
    perceptr_mean, perceptr_var = get_perceptr(y)
    tempo = get_tempo(y)
    mfcc = get_mfcc(y, sr)
    features = np.array([chroma_stft_mean, chroma_stft_var, rms_mean, rms_var,
       spectral_centroid_mean, spectral_centroid_var,
       spectral_bandwidth_mean, spectral_bandwidth_var, rolloff_mean,
       rolloff_var, zero_crossing_rate_mean, zero_crossing_rate_var,
       harmony_mean, harmony_var, perceptr_mean, perceptr_var, tempo, *mfcc])
    return features

In [61]:
def get_chroma_stft(y, sr):
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_stft_mean = np.mean(chroma_stft)
    chroma_stft_var = np.var(chroma_stft)
    return chroma_stft_mean, chroma_stft_var

def get_rms(y):
    rms = librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_var = np.var(rms)
    return rms_mean, rms_var

def get_spectral_centroid(y, sr):
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    spectral_centroid_mean = np.mean(spectral_centroid)
    spectral_centroid_var = np.var(spectral_centroid)
    return spectral_centroid_mean, spectral_centroid_var

def get_spectral_bandwidth(y, sr):
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    spectral_bandwidth_mean = np.mean(spectral_bandwidth)
    spectral_bandwidth_var = np.var(spectral_bandwidth)
    return spectral_bandwidth_mean, spectral_bandwidth_var

def get_rolloff(y, sr):
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    rolloff_mean = np.mean(rolloff)
    rolloff_var = np.var(rolloff)
    return rolloff_mean, rolloff_var

def get_zero_crossing_rate(y):
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
    zero_crossing_rate_mean = np.mean(zero_crossing_rate)
    zero_crossing_rate_var = np.var(zero_crossing_rate)
    return zero_crossing_rate_mean, zero_crossing_rate_var

def get_harmony(y):
    harmony = librosa.effects.harmonic(y)
    harmony_mean = np.mean(harmony)
    harmony_var = np.var(harmony)
    return harmony_mean, harmony_var

def get_perceptr(y):
    perceptr = librosa.effects.percussive(y)
    perceptr_mean = np.mean(perceptr)
    perceptr_var = np.var(perceptr)
    return perceptr_mean, perceptr_var

def get_tempo(y):
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    return tempo

def get_mfcc(y, sr):
    # Define MFCC parameters
    n_mfcc = 20    # Number of MFCC coefficients to calculate
    hop_length = 512   # Hop length between consecutive frames in samples (around 23ms)
    n_fft = 2048   # Size of FFT window in samples (around 93ms)

    # Extract MFCC features
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)

    # Aggregate MFCC features into a single feature vector
    mfcc_mean = np.mean(mfccs, axis=1)
    mfcc_var = np.var(mfccs, axis=1)
    mfcc_features = _format_mfcc(n_mfcc, mfcc_mean, mfcc_var)
    mfcc_features = (mfcc_features - np.mean(mfcc_features)) / np.std(mfcc_features)
    return mfcc_features

def _format_mfcc(n_mfcc, mfcc_mean, mfcc_var):
    mfcc_features = []
    for i in range(n_mfcc):
        mfcc_features.append(mfcc_mean[i])
        mfcc_features.append(mfcc_var[i])
    mfcc_features = np.array(mfcc_features)
    return mfcc_features

In [78]:
audio_path = f"{path}/dataset/songs/One_more_time.wav"
def extract_features_30s(audio_path):
    y, sr = librosa.load(audio_path)
    y = librosa.util.normalize(y)
    y = librosa.effects.trim(y)[0]
    # Calculate the duration of the audio file in seconds
    duration = librosa.get_duration(y=y, sr=sr)
    # Define the segment length in seconds
    segment_length = 3
    # Calculate the number of segments in the audio file
    num_segments = int(np.ceil(duration/segment_length))
    # Initialize an empty array to store the features for each segment
    features = np.empty((0, 57))
    for i in range(num_segments):
        # Calculate the start and end time of the segment
        start_time = i*segment_length
        end_time = min((i+1)*segment_length, duration)
        # Extract the audio segment
        segment = y[int(start_time*sr):int(end_time*sr)]
        segment_features = extract_features(segment, sr)
        # Append the segment features to the array
        features = np.vstack([features, segment_features])
    write_features_to_csv(features)
    return features

In [79]:
def write_features_to_csv(features):
    header = ['chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var',
       'spectral_centroid_mean', 'spectral_centroid_var',
       'spectral_bandwidth_mean', 'spectral_bandwidth_var', 'rolloff_mean',
       'rolloff_var', 'zero_crossing_rate_mean', 'zero_crossing_rate_var',
       'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var', 'tempo',
       'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean',
       'mfcc3_var', 'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var',
       'mfcc6_mean', 'mfcc6_var', 'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean',
       'mfcc8_var', 'mfcc9_mean', 'mfcc9_var', 'mfcc10_mean', 'mfcc10_var',
       'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var', 'mfcc13_mean',
       'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
       'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean',
       'mfcc18_var', 'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var']
    with open(f'{path}/dataset/songs/One_more_time.csv', 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        writer.writerows(features)

In [81]:
#csv_path = f'{path}/dataset/songs/One_more_time.csv'
csv_path = f'{path}/dataset/songs/One_more_time.csv'
from scipy.stats import mode
def prediction(csv_path):
    data = pd.read_csv(csv_path)
    data = data.iloc[0:, 0:]
    data
    X = data.loc[:]
    cols = X.columns
    min_max_scaler = preprocessing.MinMaxScaler()
    scaled_data = min_max_scaler.fit_transform(X)
    X = pd.DataFrame(scaled_data, columns = cols)
    preds = svm.predict(data)
    print(preds)
    preds, _ = mode(preds)
    print(preds)